This commit is contained in:
James Hensman 2014-10-22 16:22:38 +01:00
commit 65f408de9a
45 changed files with 1523 additions and 1347 deletions

View file

@ -213,7 +213,7 @@ class Model(Parameterized):
def optimize(self, optimizer=None, start=None, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
@ -231,7 +231,7 @@ class Model(Parameterized):
- 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b),
- 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only!
"""
if self.is_fixed:
raise RuntimeError, "Cannot optimize, when everything is fixed"
@ -300,7 +300,7 @@ class Model(Parameterized):
# just check the global ratio
dx = np.zeros(x.shape)
dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
# evaulate around the point x
f1 = self._objective(x + dx)
f2 = self._objective(x - dx)
@ -381,6 +381,16 @@ class Model(Parameterized):
self.optimizer_array = x
return ret
def _repr_html_(self):
"""Representation of the model in html for notebook display."""
model_details = [['<b>Model</b>', self.name + '<br>'],
['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)]]
from operator import itemgetter
to_print = [""] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["<br><b>Parameters</b>:"]
to_print.append(super(Model, self)._repr_html_())
return "\n".join(to_print)
def __str__(self):
model_details = [['Name', self.name],
['Log-likelihood', '{}'.format(float(self.log_likelihood()))],

View file

@ -53,7 +53,9 @@ class Param(Parameterizable, ObsAr):
return obj
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
self._in_init_ = True
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
self._in_init_ = False
def build_pydot(self,G):
import pydot
@ -249,6 +251,29 @@ class Param(Parameterizable, ObsAr):
if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
else: indstr = ','.join(map(str, ind))
return name + '[' + indstr + ']'
def _repr_html_(self, constr_matrix=None, indices=None, prirs=None, ties=None):
"""Representation of the parameter in html for notebook display."""
filter_ = self._current_slice_
vals = self.flat
if indices is None: indices = self._indices(filter_)
ravi = self._raveled_index(filter_)
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
if prirs is None: prirs = self.priors.properties_for(ravi)
if ties is None: ties = self._ties_for(ravi)
ties = [' '.join(map(lambda x: x, t)) for t in ties]
header_format = """
<tr>
<td><b>{i}</b></td>
<td><b>{x}</b></td>
<td><b>{c}</b></td>
<td><b>{p}</b></td>
<td><b>{t}</b></td>
</tr>"""
header = header_format.format(x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle([''])
return "\n".join(['<table>'] + [header] + ["<tr><td>{i}</td><td align=\"right\">{x}</td><td>{c}</td><td>{p}</td><td>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
filter_ = self._current_slice_
vals = self.flat

View file

@ -18,7 +18,7 @@ import numpy as np
import re
import logging
__updated__ = '2014-09-22'
__updated__ = '2014-10-22'
class HierarchyError(Exception):
"""
@ -99,7 +99,7 @@ class Observable(object):
:param bool trigger_parent: Whether to trigger the parent, after self has updated
"""
if not self.update_model():
if not self.update_model() or self._in_init_:
#print "Warning: updates are off, updating the model will do nothing"
return
self._trigger_params_changed(trigger_parent)
@ -784,7 +784,7 @@ class OptimizationHandlable(Indexable):
constraint to it.
"""
self._highest_parent_.tie.collate_gradient()
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_]
return g

View file

@ -357,6 +357,35 @@ class Parameterized(Parameterizable):
@property
def _ties_str(self):
return [','.join(x._ties_str) for x in self.flattened_parameters]
def _repr_html_(self, header=True):
"""Representation of the parameters in html for notebook display."""
name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str;
ts = self._ties_str
prirs = self._priors_str
desc = self._description_str; names = self.parameter_names()
nl = max([len(str(x)) for x in names + [name]])
sl = max([len(str(x)) for x in desc + ["Value"]])
cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
format_spec = "<tr><td>{{name:<{0}s}}</td><td align=\"right\">{{desc:>{1}s}}</td><td>{{const:^{2}s}}</td><td>{{pri:^{3}s}}</td><td>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl)
to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
if header:
header = """
<tr>
<td><b>{name}</b>
<td><b>Value</b></td>
<td><b>Constraint</b></td>
<td><b>Prior</b></td>
<td><b>Tied to</b></td>""".format(name=name)
to_print.insert(0, header)
return '<table>' + '\n'.format(sep).join(to_print) + '\n</table>'
def __str__(self, header=True):
name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str;

View file

@ -9,6 +9,7 @@ from domains import _REAL, _POSITIVE
import warnings
import weakref
class Prior(object):
domain = None
@ -17,13 +18,16 @@ class Prior(object):
def plot(self):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import priors_plots
priors_plots.univariate_plot(self)
def __repr__(self, *args, **kwargs):
return self.__str__()
class Gaussian(Prior):
"""
Implementation of the univariate Gaussian probability function, coupled with random variables.
@ -36,7 +40,8 @@ class Gaussian(Prior):
"""
domain = _REAL
_instances = []
def __new__(cls, mu, sigma): # Singleton:
def __new__(cls, mu, sigma): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -45,6 +50,7 @@ class Gaussian(Prior):
o = super(Prior, cls).__new__(cls, mu, sigma)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
@ -52,7 +58,7 @@ class Gaussian(Prior):
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
return "N({:.2g}, {:.2g})".format(self.mu, self.sigma)
def lnpdf(self, x):
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
@ -67,7 +73,8 @@ class Gaussian(Prior):
class Uniform(Prior):
domain = _REAL
_instances = []
def __new__(cls, lower, upper): # Singleton:
def __new__(cls, lower, upper): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -82,10 +89,10 @@ class Uniform(Prior):
self.upper = float(upper)
def __str__(self):
return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
return "[{:.2g}, {:.2g}]".format(self.lower, self.upper)
def lnpdf(self, x):
region = (x>=self.lower) * (x<=self.upper)
region = (x >= self.lower) * (x <= self.upper)
return region
def lnpdf_grad(self, x):
@ -94,6 +101,7 @@ class Uniform(Prior):
def rvs(self, n):
return np.random.uniform(self.lower, self.upper, size=n)
class LogGaussian(Prior):
"""
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
@ -106,7 +114,8 @@ class LogGaussian(Prior):
"""
domain = _POSITIVE
_instances = []
def __new__(cls, mu, sigma): # Singleton:
def __new__(cls, mu, sigma): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -115,6 +124,7 @@ class LogGaussian(Prior):
o = super(Prior, cls).__new__(cls, mu, sigma)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
@ -122,7 +132,7 @@ class LogGaussian(Prior):
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
return "lnN({:.2g}, {:.2g})".format(self.mu, self.sigma)
def lnpdf(self, x):
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
@ -146,7 +156,8 @@ class MultivariateGaussian:
"""
domain = _REAL
_instances = []
def __new__(cls, mu, var): # Singleton:
def __new__(cls, mu, var): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -155,6 +166,7 @@ class MultivariateGaussian:
o = super(Prior, cls).__new__(cls, mu, var)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu, var):
self.mu = np.array(mu).flatten()
self.var = np.array(var)
@ -184,10 +196,13 @@ class MultivariateGaussian:
def plot(self):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import priors_plots
priors_plots.multivariate_plot(self)
def gamma_from_EV(E, V):
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
return Gamma.from_EV(E, V)
@ -205,7 +220,8 @@ class Gamma(Prior):
"""
domain = _POSITIVE
_instances = []
def __new__(cls, a, b): # Singleton:
def __new__(cls, a, b): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -214,19 +230,20 @@ class Gamma(Prior):
o = super(Prior, cls).__new__(cls, a, b)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
return "Ga({:.2g}, {:.2g})".format(self.a, self.b)
def summary(self):
ret = {"E[x]": self.a / self.b, \
"E[ln x]": digamma(self.a) - np.log(self.b), \
"var[x]": self.a / self.b / self.b, \
"Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
"E[ln x]": digamma(self.a) - np.log(self.b), \
"var[x]": self.a / self.b / self.b, \
"Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
if self.a > 1:
ret['Mode'] = (self.a - 1.) / self.b
else:
@ -241,6 +258,7 @@ class Gamma(Prior):
def rvs(self, n):
return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
@staticmethod
def from_EV(E, V):
"""
@ -275,13 +293,14 @@ class InverseGamma(Prior):
o = super(Prior, cls).__new__(cls, a, b)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
return "iGa({:.2g}, {:.2g})".format(self.a, self.b)
def lnpdf(self, x):
return self.constant - (self.a + 1) * np.log(x) - self.b / x
@ -292,6 +311,349 @@ class InverseGamma(Prior):
def rvs(self, n):
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
class DGPLVM_KFDA(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable function using
Kernel Fisher Discriminant Analysis by Seung-Jean Kim for implementing Face paper
by Chaochao Lu.
:param lambdaa: constant
:param sigma2: constant
.. Note:: Surpassing Human-Level Face paper dgplvm implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, lambdaa, sigma2): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, lambdaa, sigma2, lbl, kern, x_shape):
"""A description for init"""
self.datanum = lbl.shape[0]
self.classnum = lbl.shape[1]
self.lambdaa = lambdaa
self.sigma2 = sigma2
self.lbl = lbl
self.kern = kern
lst_ni = self.compute_lst_ni()
self.a = self.compute_a(lst_ni)
self.A = self.compute_A(lst_ni)
self.x_shape = x_shape
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
if len(cls) > 2:
for i in range(2, self.classnum):
del cls[i]
return cls
def x_reduced(self, cls):
x1 = cls[0]
x2 = cls[1]
x = np.concatenate((x1, x2), axis=0)
return x
def compute_lst_ni(self):
lst_ni = []
lst_ni1 = []
lst_ni2 = []
f1 = (np.where(self.lbl[:, 0] == 1)[0])
f2 = (np.where(self.lbl[:, 1] == 1)[0])
for idx in f1:
lst_ni1.append(idx)
for idx in f2:
lst_ni2.append(idx)
lst_ni.append(len(lst_ni1))
lst_ni.append(len(lst_ni2))
return lst_ni
def compute_a(self, lst_ni):
a = np.ones((self.datanum, 1))
count = 0
for N_i in lst_ni:
if N_i == lst_ni[0]:
a[count:count + N_i] = (float(1) / N_i) * a[count]
count += N_i
else:
if N_i == lst_ni[1]:
a[count: count + N_i] = -(float(1) / N_i) * a[count]
count += N_i
return a
def compute_A(self, lst_ni):
A = np.zeros((self.datanum, self.datanum))
idx = 0
for N_i in lst_ni:
B = float(1) / np.sqrt(N_i) * (np.eye(N_i) - ((float(1) / N_i) * np.ones((N_i, N_i))))
A[idx:idx + N_i, idx:idx + N_i] = B
idx += N_i
return A
# Here log function
def lnpdf(self, x):
x = x.reshape(self.x_shape)
K = self.kern.K(x)
a_trans = np.transpose(self.a)
paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
inv_part = pdinv(paran)[0]
J = a_trans.dot(K).dot(self.a) - a_trans.dot(K).dot(self.A).dot(inv_part).dot(self.A).dot(K).dot(self.a)
J_star = (1. / self.lambdaa) * J
return (-1. / self.sigma2) * J_star
# Here gradient function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
K = self.kern.K(x)
paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
inv_part = pdinv(paran)[0]
b = self.A.dot(inv_part).dot(self.A).dot(K).dot(self.a)
a_Minus_b = self.a - b
a_b_trans = np.transpose(a_Minus_b)
DJ_star_DK = (1. / self.lambdaa) * (a_Minus_b.dot(a_b_trans))
DJ_star_DX = self.kern.gradients_X(DJ_star_DK, x)
return (-1. / self.sigma2) * DJ_star_DX
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior'
class DGPLVM(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
:param sigma2: constant
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
return cls
# This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
M_i[i] = np.mean(cls[i], axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
t = (j, x[j])
data_idx[class_label].append(t)
return data_idx
# Adding indices to the list so we can access whole the indices
def compute_listIndices(self, data_idx):
lst_idx = []
lst_idx_all = []
for i in data_idx:
if len(lst_idx) == 0:
pass
#Do nothing, because it is the first time list is created so is empty
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in xrange(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
# This function calculates between classes variances
def compute_Sb(self, cls, M_i, M_0):
Sb = np.zeros((self.dim, self.dim))
for i in cls:
B = (M_i[i] - M_0).reshape(self.dim, 1)
B_trans = B.transpose()
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
return Sb
# This function calculates within classes variances
def compute_Sw(self, cls, M_i):
Sw = np.zeros((self.dim, self.dim))
for i in cls:
N_i = float(len(cls[i]))
W_WT = np.zeros((self.dim, self.dim))
for xk in cls[i]:
W = (xk - M_i[i])
W_WT += np.outer(W, W)
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
return Sw
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
for i in data_idx:
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in xrange(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
beta = (float(1) / N_i) - (float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
else:
beta = -(float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
return Sig_beta_B_i_all
# Calculating W_j s separately so we can access all the W_j s anytime
def compute_wj(self, data_idx, M_i):
W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
xj = tpl[1]
j = tpl[0]
W_i[j] = (xj - M_i[i])
return W_i
# Calculating alpha and Wj for Sw
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
k = tpl[0]
for j in lst_idx_all[i]:
if k == j:
alpha = 1 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
else:
alpha = 0 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
return Sig_alpha_W_i
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x)
lst_idx_all = self.compute_listIndices(data_idx)
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
W_i = self.compute_wj(data_idx, M_i)
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
# Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
# Calculating DJ/DXk
DJ_Dxk = 2 * (
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
Sig_alpha_W_i))
# Calculating derivative of the log of the prior
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
return DPx_Dx.T
# def frb(self, x):
# from functools import partial
# from GPy.models import GradientChecker
# f = partial(self.lnpdf)
# df = partial(self.lnpdf_grad)
# grad = GradientChecker(f, df, x, 'X')
# grad.checkgrad(verbose=1)
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior'
class HalfT(Prior):
"""
Implementation of the half student t probability function, coupled with random variables.
@ -317,7 +679,7 @@ class HalfT(Prior):
self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)
def __str__(self):
return "hT(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
def lnpdf(self,theta):
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )

View file

@ -27,12 +27,20 @@ class Transformation(object):
if not cls._instance or cls._instance.__class__ is not cls:
cls._instance = super(Transformation, cls).__new__(cls, *args, **kwargs)
return cls._instance
def f(self, x):
def f(self, opt_param):
raise NotImplementedError
def finv(self, x):
def finv(self, model_param):
raise NotImplementedError
def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f"""
def gradfactor(self, model_param, dL_dmodel_param):
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
i.e.:
define
.. math::
\frac{\frac{\partial L}{\partial f}\left(\left.\partial f(x)}{\partial x}\right|_{x=f^{-1}(f)\right)}
"""
raise NotImplementedError
def initialize(self, f):
""" produce a sensible initial value for f(x)"""
@ -58,8 +66,8 @@ class Logexp(Transformation):
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
def gradfactor(self, f):
return np.where(f>_lim_val, 1., 1. - np.exp(-f))
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
@ -68,6 +76,209 @@ class Logexp(Transformation):
return '+ve'
class NormalTheta(Transformation):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def f(self, theta):
# In here abs is only a trick to make sure the numerics are ok.
# The variance will never go below zero, but at initialization we need to make sure
# that the values are ok
# Before:
theta[self.var_indices] = np.abs(-.5/theta[self.var_indices])
theta[self.mu_indices] *= theta[self.var_indices]
return theta # which is now {mu, var}
def finv(self, muvar):
# before:
varp = muvar[self.var_indices]
muvar[self.mu_indices] /= varp
muvar[self.var_indices] = -.5/varp
return muvar # which is now {theta1, theta2}
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# theta gradients
# This works and the gradient checks!
dmuvar[self.mu_indices] *= var
dmuvar[self.var_indices] *= 2*(var)**2
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
#=======================================================================
return dmuvar # which is now the gradient multiplicator for {theta1, theta2}
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
f[self.var_indices] = np.abs(f[self.var_indices])
return f
def __str__(self):
return "theta"
class NormalNaturalAntti(NormalTheta):
_instances = []
_logexp = Logexp()
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# theta gradients
# This works and the gradient checks!
dmuvar[self.mu_indices] *= var
dmuvar[self.var_indices] *= 2*var**2#np.einsum('i,i,i,i->i', dmuvar[self.var_indices], [2], var, var)
#=======================================================================
return dmuvar # which is now the gradient multiplicator
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
f[self.var_indices] = np.abs(f[self.var_indices])
return f
def __str__(self):
return "natantti"
class NormalEta(Transformation):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def f(self, theta):
theta[self.var_indices] = np.abs(theta[self.var_indices] - theta[self.mu_indices]**2)
return theta # which is now {mu, var}
def finv(self, muvar):
muvar[self.var_indices] += muvar[self.mu_indices]**2
return muvar # which is now {eta1, eta2}
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
#=======================================================================
# Lets try natural gradients instead: Not working with bfgs... try stochastic!
dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices]
#=======================================================================
return dmuvar # which is now the gradient multiplicator
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
f[self.var_indices] = np.abs(f[self.var_indices])
return f
def __str__(self):
return "eta"
class NormalNaturalThroughTheta(NormalTheta):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# This is just eta direction:
dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices]
#=======================================================================
#=======================================================================
# This is by going through theta fully and then going into eta direction:
#dmu = dmuvar[self.mu_indices]
#dmuvar[self.var_indices] += dmu*mu*(var + 4/var)
#=======================================================================
return dmuvar # which is now the gradient multiplicator
def __str__(self):
return "natgrad"
class NormalNaturalThroughEta(NormalEta):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# theta gradients
# This works and the gradient checks!
dmuvar[self.mu_indices] *= var
dmuvar[self.var_indices] *= 2*(var)**2
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
#=======================================================================
return dmuvar
def __str__(self):
return "natgrad"
class LogexpNeg(Transformation):
domain = _POSITIVE
def f(self, x):
@ -75,8 +286,8 @@ class LogexpNeg(Transformation):
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))
def gradfactor(self, f):
return np.where(f>_lim_val, -1, -1 + np.exp(-f))
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
@ -92,8 +303,8 @@ class NegativeLogexp(Transformation):
return -self.logexp.f(x) # np.log(1. + np.exp(x))
def finv(self, f):
return self.logexp.finv(-f) # np.log(np.exp(-f) - 1.)
def gradfactor(self, f):
return -self.logexp.gradfactor(-f)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, -self.logexp.gradfactor(-f))
def initialize(self, f):
return -self.logexp.initialize(f) # np.abs(f)
def __str__(self):
@ -125,10 +336,10 @@ class LogexpClipped(Logexp):
return np.clip(f, self.min_bound, self.max_bound)
def finv(self, f):
return np.log(np.exp(f - 1.))
def gradfactor(self, f):
def gradfactor(self, f, df):
ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
gf = (ef - 1.) / ef
return gf # np.where(f < self.lower, 0, gf)
return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
@ -143,8 +354,8 @@ class Exponent(Transformation):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f):
return f
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
@ -158,8 +369,8 @@ class NegativeExponent(Exponent):
return -Exponent.f(x)
def finv(self, f):
return Exponent.finv(-f)
def gradfactor(self, f):
return f
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
return -Exponent.initialize(f) #np.abs(f)
def __str__(self):
@ -171,8 +382,8 @@ class Square(Transformation):
return x ** 2
def finv(self, x):
return np.sqrt(x)
def gradfactor(self, f):
return 2 * np.sqrt(f)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, 2 * np.sqrt(f))
def initialize(self, f):
return np.abs(f)
def __str__(self):
@ -201,8 +412,8 @@ class Logistic(Transformation):
return self.lower + self.difference / (1. + np.exp(-x))
def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f):
return (f - self.lower) * (self.upper - f) / self.difference
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints"
@ -213,4 +424,3 @@ class Logistic(Transformation):
return '{},{}'.format(self.lower, self.upper)

View file

@ -9,6 +9,11 @@ from .. import likelihoods
from parameterization.variational import VariationalPosterior
import logging
from GPy.inference.latent_function_inference.posterior import Posterior
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
SparseGPMissing
#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
#SparseGPMissing
logger = logging.getLogger("sparse gp")
class SparseGP(GP):
@ -34,12 +39,13 @@ class SparseGP(GP):
"""
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1):
#pick a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC()
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
else:
#inference_method = ??
raise NotImplementedError, "what to do what to do?"
@ -49,46 +55,284 @@ class SparseGP(GP):
self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
self.missing_data = missing_data
if stochastic and missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPStochastics(self, batchsize)
elif stochastic and not missing_data:
self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize)
elif missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPMissing(self)
else:
self.stochastics = False
logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0)
if self.missing_data:
self.Ylist = []
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print message,
for d in xrange(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print ' '*(len(message)+1) + '\r',
message = m_f(d)
print message,
print ''
self.posterior = None
def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior):
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
"""
This is the standard part, which usually belongs in parameters_changed.
For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
loop, in order to ensure a different handling of gradients etc of different
subsets of data.
The dict in current_values will be passed aroung as current_values for
the rest of the algorithm, so this is the place to store current values,
such as subsets etc, if necessary.
If Lm and dL_dKmm can be precomputed (or only need to be computed once)
pass them in here, so they will be passed to the inference_method.
subset_indices is a dictionary of indices. you can put the indices however you
like them into this dictionary for inner use of the indices inside the
algorithm.
"""
try:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
except:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
current_values = {}
likelihood.update_gradients(grad_dict['dL_dthetaL'])
current_values['likgrad'] = likelihood.gradient.copy()
if subset_indices is None:
subset_indices = {}
if isinstance(X, VariationalPosterior):
#gradients wrt kernel
dL_dKmm = self.grad_dict['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
target = self.kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X,
Z=self.Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.kern.gradient += target
dL_dKmm = grad_dict['dL_dKmm']
kern.update_gradients_full(dL_dKmm, Z, None)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_expectations(variational_posterior=X,
Z=Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi0'],
self.grad_dict['dL_dpsi1'],
self.grad_dict['dL_dpsi2'],
Z=self.Z,
variational_posterior=self.X)
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
current_values['Zgrad'] += kern.gradients_Z_expectations(
grad_dict['dL_dpsi0'],
grad_dict['dL_dpsi1'],
grad_dict['dL_dpsi2'],
Z=Z,
variational_posterior=X)
else:
#gradients wrt kernel
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
target = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
target += self.kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern.gradient += target
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
current_values['kerngrad'] += kern.gradient
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
"""
This is for automatic updates of values in the inner loop of missing
data handling. Both arguments are dictionaries and the values in
full_values will be updated by the current_gradients.
If a key from current_values does not exist in full_values, it will be
initialized to the value in current_values.
If there is indices needed for the update, value_indices can be used for
that. If value_indices has the same key, as current_values, the update
in full_values will be indexed by the indices in value_indices.
grads:
dictionary of standing gradients (you will have to carefully make sure, that
the ordering is right!). The values in here will be updated such that
full_values[key] += current_values[key] forall key in full_gradients.keys()
gradients:
dictionary of gradients in the current set of parameters.
value_indices:
dictionary holding indices for the update in full_values.
if the key exists the update rule is:def df(x):
m.stochastics.do_stochastics()
grads = m._grads(x)
print '\r',
message = "Lik: {: 6.4E} Grad: {: 6.4E} Dim: {} Lik: {} Len: {!s}".format(float(m.log_likelihood()), np.einsum('i,i->', grads, grads), m.stochastics.d, float(m.likelihood.variance), " ".join(["{:3.2E}".format(l) for l in m.kern.lengthscale.values]))
print message,
return grads
def grad_stop(threshold):
def inner(args):
g = args['gradient']
return np.sqrt(np.einsum('i,i->',g,g)) < threshold
return inner
def maxiter_stop(maxiter):
def inner(args):
return args['n_iter'] == maxiter
return inner
def optimize(m, maxiter=1000):
#opt = climin.RmsProp(m.optimizer_array.copy(), df, 1e-6, decay=0.9, momentum=0.9, step_adapt=1e-7)
opt = climin.Adadelta(m.optimizer_array.copy(), df, 1e-2, decay=0.9)
ret = opt.minimize_until((grad_stop(.1), maxiter_stop(maxiter)))
print
return ret
full_values[key][value_indices[key]] += current_values[key]
"""
for key in current_values.keys():
if value_indices is not None and value_indices.has_key(key):
index = value_indices[key]
else:
index = slice(None)
if full_values.has_key(key):
full_values[key][index] += current_values[key]
else:
full_values[key] = current_values[key]
def _inner_values_update(self, current_values):
"""
This exists if there is more to do with the current values.
It will be called allways in the inner loop, so that
you can do additional inner updates for the inside of the missing data
loop etc. This can also be used for stochastic updates, when only working on
one dimension of the output.
"""
pass
def _outer_values_update(self, full_values):
"""
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
self.likelihood.gradient = full_values['likgrad']
self.kern.gradient = full_values['kerngrad']
self.Z.gradient = full_values['Zgrad']
def _outer_init_full_values(self):
"""
If full_values has indices in values_indices, we might want to initialize
the full_values differently, so that subsetting is possible.
Here you can initialize the full_values for the values needed.
Keep in mind, that if a key does not exist in full_values when updating
values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
to initialize the gradients for the mean and the variance in order to
have the full gradient for indexing)
"""
return {}
def _outer_loop_for_missing_data(self):
Lm = None
dL_dKmm = None
self._log_marginal_likelihood = 0
full_values = self._outer_init_full_values()
if self.posterior is None:
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
else:
woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector
if not self.stochastics:
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
message = m_f(-1)
print message,
for d in self.stochastics.d:
ninan = self.ninan[:, d]
if not self.stochastics:
print ' '*(len(message)) + '\r',
message = m_f(d)
print message,
posterior, log_marginal_likelihood, \
grad_dict, current_values, value_indices = self._inner_parameters_changed(
self.kern, self.X[ninan],
self.Z, self.likelihood,
self.Ylist[d], self.Y_metadata,
Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan))
self._inner_take_over_or_update(full_values, current_values, value_indices)
self._inner_values_update(current_values)
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics:
print ''
if self.posterior is None:
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
self._outer_values_update(full_values)
def _outer_loop_without_missing_data(self):
self._log_marginal_likelihood = 0
if self.posterior is None:
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
else:
woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector
d = self.stochastics.d
posterior, log_marginal_likelihood, \
grad_dict, current_values, _ = self._inner_parameters_changed(
self.kern, self.X,
self.Z, self.likelihood,
self.Y_normalized[:, d], self.Y_metadata)
self.grad_dict = grad_dict
self._log_marginal_likelihood += log_marginal_likelihood
self._outer_values_update(current_values)
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
woodbury_vector[:, d] = posterior.woodbury_vector
if self.posterior is None:
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
def parameters_changed(self):
if self.missing_data:
self._outer_loop_for_missing_data()
elif self.stochastics:
self._outer_loop_without_missing_data()
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self._outer_values_update(full_values)
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""

View file

@ -34,15 +34,17 @@ class SparseGP_MPI(SparseGP):
"""
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1):
self._IN_OPTIMIZATION_ = False
if mpi_comm != None:
if inference_method is None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else:
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer,
missing_data=missing_data, stochastic=stochastic, batchsize=batchsize)
self.update_model(False)
self.link_parameter(self.X, index=0)
if variational_prior is not None:
@ -75,18 +77,18 @@ class SparseGP_MPI(SparseGP):
return dc
#=====================================================
# The MPI parallelization
# The MPI parallelization
# - can move to model at some point
#=====================================================
@SparseGP.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
self.mpi_comm.Bcast(p, root=0)
SparseGP.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None:

View file

@ -12,7 +12,7 @@ from sympy.utilities.iterables import numbered_symbols
import scipy
import GPy
#from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
from GPy.util.symbolic import normcdf, normcdfln, logistic, logisticln, erfcx, erfc, gammaln
#@NDL you removed this file! #from GPy.util.symbolic import normcdf, normcdfln, logistic, logisticln, erfcx, erfc, gammaln
def getFromDict(dataDict, mapList):
return reduce(lambda d, k: d[k], mapList, dataDict)

View file

@ -7,11 +7,6 @@ Gaussian Processes classification
"""
import GPy
try:
import pylab as pb
except:
pass
default_seed = 10000
def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
@ -19,7 +14,9 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
"""
data = GPy.util.datasets.oil()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.oil()
X = data['X']
Xtest = data['Xtest']
Y = data['Y'][:, 0:1]
@ -54,7 +51,9 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -71,7 +70,8 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
# Plot
if plot:
fig, axes = pb.subplots(2, 1)
from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
@ -87,7 +87,9 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -107,7 +109,8 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
# Plot
if plot:
fig, axes = pb.subplots(2, 1)
from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
@ -123,7 +126,9 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -137,7 +142,8 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
# Plot
if plot:
fig, axes = pb.subplots(2, 1)
from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
@ -153,7 +159,9 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -171,7 +179,8 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
# Plot
if plot:
fig, axes = pb.subplots(2, 1)
from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
@ -190,7 +199,9 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
:param kernel: kernel to use in the model
:type kernel: a GPy kernel
"""
data = GPy.util.datasets.crescent_data(seed=seed)
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0

View file

@ -1,6 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as _np
#default_seed = _np.random.seed(123344)
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
@ -68,7 +69,8 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
def gplvm_oil_100(optimize=True, verbose=1, plot=True):
import GPy
data = GPy.util.datasets.oil_100()
import pods
data = pods.datasets.oil_100()
Y = data['X']
# create simple GP model
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
@ -80,8 +82,10 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50):
import GPy
import pods
_np.random.seed(0)
data = GPy.util.datasets.oil()
data = pods.datasets.oil()
Y = data['X'][:N]
Y = Y - Y.mean(0)
Y /= Y.std(0)
@ -98,7 +102,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2):
import GPy
from GPy.util.datasets import swiss_roll_generated
from pods.datasets import swiss_roll_generated
from GPy.models import BayesianGPLVM
data = swiss_roll_generated(num_samples=N, sigma=sigma)
@ -157,9 +161,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
import GPy
from matplotlib import pyplot as plt
import numpy as np
import pods
_np.random.seed(0)
data = GPy.util.datasets.oil()
data = pods.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N]
@ -182,9 +187,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy
from matplotlib import pyplot as plt
import pods
_np.random.seed(0)
data = GPy.util.datasets.oil()
data = pods.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N]
@ -208,12 +214,12 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
Q_signal = 4
import GPy
import numpy as np
np.random.seed(0)
np.random.seed(3000)
k = GPy.kern.Matern32(Q_signal, 1., lengthscale=np.random.uniform(1,6,Q_signal), ARD=1)
k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1)
t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T
K = k.K(t)
s1, s2, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
@ -360,23 +366,19 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
):
from GPy import kern
from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy()
Ymissing[inan] = _np.nan
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
inference_method=VarDTCMissingData(inan=inan), kernel=k)
kernel=k, missing_data=True)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
m.likelihood.variance = .01
m.parameters_changed()
m.Yreal = Y
if optimize:
@ -445,8 +447,9 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
def brendan_faces(optimize=True, verbose=True, plot=True):
import GPy
import pods
data = GPy.util.datasets.brendan_faces()
data = pods.datasets.brendan_faces()
Q = 2
Y = data['Y']
Yn = Y - Y.mean()
@ -469,8 +472,9 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
def olivetti_faces(optimize=True, verbose=True, plot=True):
import GPy
import pods
data = GPy.util.datasets.olivetti_faces()
data = pods.datasets.olivetti_faces()
Q = 2
Y = data['Y']
Yn = Y - Y.mean()
@ -490,7 +494,9 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True):
import GPy
data = GPy.util.datasets.osu_run1()
import pods
data = pods.datasets.osu_run1()
# optimize
if range == None:
Y = data['Y'].copy()
@ -505,8 +511,9 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru
def stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt
import GPy
import pods
data = GPy.util.datasets.osu_run1()
data = pods.datasets.osu_run1()
# optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
@ -524,8 +531,9 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt
import GPy
import pods
data = GPy.util.datasets.osu_run1()
data = pods.datasets.osu_run1()
# optimize
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
@ -543,8 +551,9 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt
import GPy
import pods
data = GPy.util.datasets.osu_run1()
data = pods.datasets.osu_run1()
# optimize
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
@ -563,8 +572,9 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
def robot_wireless(optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt
import GPy
import pods
data = GPy.util.datasets.robot_wireless()
data = pods.datasets.robot_wireless()
# optimize
m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25)
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
@ -578,8 +588,9 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt
import numpy as np
import GPy
import pods
data = GPy.util.datasets.osu_run1()
data = pods.datasets.osu_run1()
Q = 6
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
@ -609,8 +620,9 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True):
import GPy
import pods
data = GPy.util.datasets.cmu_mocap(subject, motion)
data = pods.datasets.cmu_mocap(subject, motion)
if in_place:
# Make figure move in place.
data['Y'][:, 0:3] = 0.0

View file

@ -13,7 +13,9 @@ import GPy
def olympic_marathon_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Olympic marathon data."""
data = GPy.util.datasets.olympic_marathon_men()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.olympic_marathon_men()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
@ -82,7 +84,9 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
from the Mount Epomeo runs. Requires gpxpy to be installed on your system
to load in the data.
"""
data = GPy.util.datasets.epomeo_gpx()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.epomeo_gpx()
num_data_list = []
for Xpart in data['X']:
num_data_list.append(Xpart.shape[0])
@ -107,9 +111,9 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
k = k1**k2
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
m.constrain_fixed('.*rbf_var', 1.)
m.constrain_fixed('iip')
m.constrain_bounded('noise_variance', 1e-3, 1e-1)
m.constrain_fixed('.*variance', 1.)
m.inducing_inputs.constrain_fixed()
m.Gaussian_noise.variance.constrain_bounded(1e-3, 1e-1)
m.optimize(max_iters=max_iters,messages=True)
return m
@ -125,7 +129,9 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
length_scales = np.linspace(0.1, 60., resolution)
log_SNRs = np.linspace(-3., 4., resolution)
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
# data['Y'] = data['Y'][0::2, :]
# data['X'] = data['X'][0::2, :]
@ -205,13 +211,15 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
def olympic_100m_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
data = GPy.util.datasets.olympic_100m_men()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.olympic_100m_men()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
# set the lengthscale to be something sensible (defaults to 1)
m['rbf_lengthscale'] = 10
m.rbf.lengthscale = 10
if optimize:
m.optimize('bfgs', max_iters=200)
@ -222,7 +230,9 @@ def olympic_100m_men(optimize=True, plot=True):
def toy_rbf_1d(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_rbf_1d()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
@ -236,7 +246,9 @@ def toy_rbf_1d(optimize=True, plot=True):
def toy_rbf_1d_50(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d_50()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_rbf_1d_50()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
@ -303,12 +315,11 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
# m.set_prior('.*lengthscale',len_prior)
if optimize:
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
m.optimize(optimizer='scg', max_iters=max_iters)
if plot:
m.kern.plot_ARD()
print m
return m
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True):
@ -343,24 +354,25 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
# m.set_prior('.*lengthscale',len_prior)
if optimize:
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
m.optimize(optimizer='scg', max_iters=max_iters)
if plot:
m.kern.plot_ARD()
print m
return m
def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
"""Predict the location of a robot given wirelss signal strength readings."""
data = GPy.util.datasets.robot_wireless()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.robot_wireless()
# create simple GP Model
m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel)
# optimize
if optimize:
m.optimize(messages=True, max_iters=max_iters)
m.optimize(max_iters=max_iters)
Xpredict = m.predict(data['Ytest'])[0]
if plot:
@ -372,13 +384,14 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
sse = ((data['Xtest'] - Xpredict)**2).sum()
print m
print('Sum of squares error on test data: ' + str(sse))
return m
def silhouette(max_iters=100, optimize=True, plot=True):
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
data = GPy.util.datasets.silhouette()
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.silhouette()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
@ -390,7 +403,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
print m
return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True):
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
"""Run a 1D example of a sparse GP regression."""
# sample inputs and outputs
X = np.random.uniform(-3., 3., (num_samples, 1))
@ -401,10 +414,10 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
if checkgrad:
m.checkgrad(verbose=1)
m.checkgrad()
if optimize:
m.optimize('tnc', messages=1, max_iters=max_iters)
m.optimize('tnc', max_iters=max_iters)
if plot:
m.plot()

View file

@ -21,6 +21,13 @@ class EPDTC(LatentFunctionInference):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def on_optimization_start(self):
self._ep_approximation = None
def on_optimization_end(self):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def _get_trYYT(self, Y):
return np.sum(np.square(Y))

View file

@ -82,7 +82,7 @@ class Laplace(LatentFunctionInference):
#define the objective function (to be maximised)
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata)
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
difference = np.inf
iteration = 0
@ -152,7 +152,7 @@ class Laplace(LatentFunctionInference):
Ki_W_i = K - C.T.dot(C)
#compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata).sum() - np.sum(np.log(np.diag(L)))
# Compute matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat

View file

@ -62,7 +62,9 @@ class VarDTC(LatentFunctionInference):
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
_, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior)
@ -84,8 +86,8 @@ class VarDTC(LatentFunctionInference):
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
if Lm is None:
Lm = jitchol(Kmm)
# The rather complex computations of A, and the psi stats
if uncertain_inputs:
@ -100,7 +102,6 @@ class VarDTC(LatentFunctionInference):
else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
psi2 = None
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
@ -122,11 +123,12 @@ class VarDTC(LatentFunctionInference):
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
if dL_dKmm is None:
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
@ -208,6 +210,7 @@ class VarDTCMissingData(LatentFunctionInference):
inan = np.isnan(Y)
has_none = inan.any()
self._inan = ~inan
inan = self._inan
else:
inan = self._inan
has_none = True
@ -241,7 +244,7 @@ class VarDTCMissingData(LatentFunctionInference):
self._subarray_indices = [[slice(None),slice(None)]]
return [Y], [(Y**2).sum()]
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None):
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
psi0_all = kern.psi0(Z, X)
@ -260,7 +263,7 @@ class VarDTCMissingData(LatentFunctionInference):
dL_dpsi0_all = np.zeros(Y.shape[0])
dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
if uncertain_inputs:
dL_dpsi2_all = np.zeros((num_inducing, num_inducing))
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
dL_dR = 0
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
@ -271,31 +274,31 @@ class VarDTCMissingData(LatentFunctionInference):
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
#factor Kmm
Lm = jitchol(Kmm)
if Lm is None:
Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm)
size = Y.shape[1]
size = len(Ys)
next_ten = 0
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
if ((i+1.)/size) >= next_ten:
logger.info('inference {:> 6.1%}'.format((i+1.)/size))
next_ten += .1
if het_noise: beta = beta_all[i]
else: beta = beta_all
VVT_factor = (y*beta)
output_dim = 1#len(ind)
psi0 = psi0_all[v]
psi1 = psi1_all[v, :]
if uncertain_inputs: psi2 = kern.psi2(Z, X[v, :])
if uncertain_inputs:
psi2 = kern.psi2(Z, X[v, :])
else: psi2 = None
num_data = psi1.shape[0]
if uncertain_inputs:
if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else: psi2_beta = psi2 * beta
else: psi2_beta = psi2 * (beta)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
@ -330,11 +333,11 @@ class VarDTCMissingData(LatentFunctionInference):
dL_dpsi0_all[v] += dL_dpsi0
dL_dpsi1_all[v, :] += dL_dpsi1
if uncertain_inputs:
dL_dpsi2_all += dL_dpsi2
dL_dpsi2_all[v, :, :] += dL_dpsi2
# log marginal likelihood
log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit,VVT_factor)
psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places
dL_dR += _compute_dL_dR(likelihood,
@ -393,7 +396,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
# subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None
return dL_dpsi0, dL_dpsi1, dL_dpsi2

View file

@ -0,0 +1,53 @@
'''
Created on 9 Oct 2014
@author: maxz
'''
class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
'''
def __init__(self, model):
"""
Initialize this stochastic container using the given model
"""
def do_stochastics(self):
"""
Update the internal state to the next batch of the stochastic
descent algorithm.
"""
pass
class SparseGPMissing(StochasticStorage):
def __init__(self, model, batchsize=1):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time.
"""
self.d = xrange(model.Y_normalized.shape[1])
class SparseGPStochastics(StochasticStorage):
"""
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1):
import itertools
self.batchsize = batchsize
if self.batchsize == 1:
self.dimensions = itertools.cycle(range(model.Y_normalized.shape[1]))
else:
import numpy as np
self.dimensions = lambda: np.random.choice(model.Y_normalized.shape[1], size=batchsize, replace=False)
self.d = None
self.do_stochastics()
def do_stochastics(self):
if self.batchsize == 1:
self.d = [self.dimensions.next()]
else:
self.d = self.dimensions()

View file

@ -17,17 +17,3 @@ from _src.poly import Poly
from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern
# TODO: put this in an init file somewhere
#I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
from _src.symbolic import Symbolic
from _src.eq import Eq
from _src.heat_eqinit import Heat_eqinit
#from _src.ode1_eq_lfm import Ode1_eq_lfm

View file

@ -1,30 +0,0 @@
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
import numpy as np
from symbolic import Symbolic
class Eq(Symbolic):
"""
The exponentiated quadratic covariance as a symbolic function.
"""
def __init__(self, input_dim, output_dim=1, variance=1.0, lengthscale=1.0, name='Eq'):
parameters = {'variance' : variance, 'lengthscale' : lengthscale}
x = sym.symbols('x_:' + str(input_dim))
z = sym.symbols('z_:' + str(input_dim))
variance = sym.var('variance',positive=True)
lengthscale = sym.var('lengthscale', positive=True)
dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
from sympy.parsing.sympy_parser import parse_expr
dist = parse_expr(dist_string)
# this is the covariance function
f = variance*sym.exp(-dist/(2*lengthscale**2))
# extra input dim is to signify the output dimension.
super(Eq, self).__init__(input_dim=input_dim, k=f, output_dim=output_dim, parameters=parameters, name=name)

View file

@ -1,30 +0,0 @@
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
import numpy as np
from symbolic import Symbolic
class Heat_eqinit(Symbolic):
"""
A symbolic covariance based on laying down an initial condition of the heat equation with an exponentiated quadratic covariance. The covariance then has multiple outputs which are interpreted as observations of the diffused process with different diffusion coefficients (or at different times).
"""
def __init__(self, input_dim, output_dim=1, param=None, name='Heat_eqinit'):
x = sym.symbols('x_:' + str(input_dim))
z = sym.symbols('z_:' + str(input_dim))
scale = sym.var('scale_i scale_j',positive=True)
lengthscale = sym.var('lengthscale_i lengthscale_j', positive=True)
shared_lengthscale = sym.var('shared_lengthscale', positive=True)
dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
from sympy.parsing.sympy_parser import parse_expr
dist = parse_expr(dist_string)
# this is the covariance function
f = scale_i*scale_j*sym.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j)))
# extra input dim is to signify the output dimension.
super(Heat_eqinit, self).__init__(input_dim=input_dim+1, k=f, output_dim=output_dim, name=name)

View file

@ -6,6 +6,7 @@ import numpy as np
from ...core.parameterization.parameterized import Parameterized
from kernel_slice_operations import KernCallsViaSlicerMeta
from ...util.caching import Cache_this
from GPy.core.parameterization.observable_array import ObsAr
@ -48,12 +49,26 @@ class Kern(Parameterized):
if active_dims is None:
active_dims = np.arange(input_dim)
self.active_dims = np.array(active_dims, dtype=int)
self.active_dims = np.atleast_1d(active_dims).astype(int)
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
@property
def return_psi2_n(self):
"""
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
"""
return self._return_psi2_n_flag[0]
@return_psi2_n.setter
def return_psi2_n(self, val):
def visit(self):
if isinstance(self, Kern):
self._return_psi2_n_flag[0]=val
self.traverse(visit)
@Cache_this(limit=20)
def _slice_X(self, X):
@ -162,7 +177,7 @@ class Kern(Parameterized):
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __imul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)

View file

@ -10,7 +10,6 @@ import sslinear_psi_comp
import linear_psi_comp
class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
@ -19,7 +18,7 @@ class PSICOMP_RBF(Pickleable):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
@ -28,10 +27,10 @@ class PSICOMP_RBF(Pickleable):
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self):
pass
class PSICOMP_Linear(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
@ -42,7 +41,7 @@ class PSICOMP_Linear(Pickleable):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
@ -51,6 +50,6 @@ class PSICOMP_Linear(Pickleable):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self):
pass

View file

@ -17,7 +17,7 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
@ -41,7 +41,7 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
_psi1 = variance*np.exp(_psi1_log)
return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S):
@ -54,27 +54,27 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
@ -82,7 +82,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
@ -101,9 +101,9 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
Lpsi1 = dL_dpsi1*_psi1
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
@ -114,7 +114,7 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
@ -133,21 +133,20 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
denom = 1./(2*S+lengthscale2)
denom2 = np.square(denom)
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
Lpsi2 = dL_dpsi2[None,:,:]*_psi2
Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Zhat = Lpsi2Z
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
_dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
@ -157,6 +156,6 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1)

View file

@ -8,7 +8,8 @@ from ...core.parameterization.transformations import Logexp
from ...util.linalg import tdot
from ... import util
import numpy as np
from scipy import integrate
from scipy import integrate, weave
from ...util.config import config # for assesing whether to use weave
from ...util.caching import Cache_this
class Stationary(Kern):
@ -71,6 +72,13 @@ class Stationary(Kern):
@Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None):
"""
Kernel function applied on inputs X and X2.
In the stationary case there is an inner function depending on the
distances from X to X2, called r.
K(X, X2) = K_of_r((X-X2)**2)
"""
r = self._scaled_dist(X, X2)
return self.K_of_r(r)
@ -124,10 +132,22 @@ class Stationary(Kern):
return ret
def update_gradients_diag(self, dL_dKdiag, X):
"""
Given the derivative of the objective with respect to the diagonal of
the covariance matrix, compute the derivative wrt the parameters of
this kernel and stor in the <parameter>.gradient field.
See also update_gradients_full
"""
self.variance.gradient = np.sum(dL_dKdiag)
self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt the covariance matrix
(dL_dK), compute the gradient wrt the parameters of this kernel,
and store in the parameters object as e.g. self.variance.gradient
"""
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
#now the lengthscale gradient(s)
@ -139,7 +159,17 @@ class Stationary(Kern):
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
if config.getboolean('weave', 'working'):
try:
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -154,10 +184,43 @@ class Stationary(Kern):
dist = self._scaled_dist(X, X2).copy()
return 1./np.where(dist != 0., dist, np.inf)
def weave_lengthscale_grads(self, tmp, X, X2):
"""Use scipy.weave to compute derivatives wrt the lengthscales"""
N,M = tmp.shape
Q = X.shape[1]
if hasattr(X, 'values'):X = X.values
if hasattr(X2, 'values'):X2 = X2.values
grads = np.zeros(self.input_dim)
code = """
double gradq;
for(int q=0; q<Q; q++){
gradq = 0;
for(int n=0; n<N; n++){
for(int m=0; m<M; m++){
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
}
}
grads[q] = gradq;
}
"""
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
return -grads/self.lengthscale**3
def gradients_X(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
"""
if config.getboolean('weave', 'working'):
try:
return self.gradients_X_weave(dL_dK, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self.gradients_X_(dL_dK, X, X2)
else:
return self.gradients_X_(dL_dK, X, X2)
def gradients_X_(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
@ -177,6 +240,36 @@ class Stationary(Kern):
return ret
def gradients_X_weave(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
if X2 is None:
tmp = tmp + tmp.T
X2 = X
code = """
int n,q,d;
double retnd;
for(n=0;n<N;n++){
for(d=0;d<D;d++){
retnd = 0;
for(q=0;q<Q;q++){
retnd += tmp(n,q)*(X(n,d)-X2(q,d));
}
ret(n,d) = retnd;
}
}
"""
if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
if hasattr(X2, 'values'):X2 = X2.values
ret = np.zeros(X.shape)
N,D = X.shape
Q = tmp.shape[1]
from scipy import weave
weave.inline(code, ['ret', 'N', 'D', 'Q', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz)
return ret/self.lengthscale**2
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
@ -311,7 +404,7 @@ class Matern52(Stationary):
class ExpQuad(Stationary):
"""
The Exponentiated quadratic covariance function.
The Exponentiated quadratic covariance function.
.. math::

View file

@ -6,18 +6,3 @@ from poisson import Poisson
from student_t import StudentT
from likelihood import Likelihood
from mixed_noise import MixedNoise
#TODO need to fix this in a config file.
#TODO need to add the files to the git repo!
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
#These are likelihoods that rely on symbolic.
from symbolic import Symbolic
from sstudent_t import SstudentT
from negative_binomial import Negative_binomial
from skew_normal import Skew_normal
from skew_exponential import Skew_exponential
# from null_category import Null_category

View file

@ -113,10 +113,8 @@ class Bernoulli(Likelihood):
.. Note:
Each y_i must be in {0, 1}
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
objective = np.where(y, inv_link_f, 1.-inv_link_f)
return np.exp(np.sum(np.log(objective)))
return np.where(y, inv_link_f, 1.-inv_link_f)
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
"""
@ -133,13 +131,9 @@ class Bernoulli(Likelihood):
:returns: log likelihood evaluated at points inverse link of f.
:rtype: float
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
state = np.seterr(divide='ignore')
# TODO check y \in {0, 1} or {-1, 1}
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
np.seterr(**state)
return np.sum(objective)
p = np.where(y==1, inv_link_f, 1.-inv_link_f)
return np.log(p)
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
"""
@ -156,13 +150,10 @@ class Bernoulli(Likelihood):
:returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
state = np.seterr(divide='ignore')
# TODO check y \in {0, 1} or {-1, 1}
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
np.seterr(**state)
return grad
#grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
denom = np.where(y, inv_link_f, -(1-inv_link_f))
return 1./denom
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
"""
@ -185,13 +176,13 @@ class Bernoulli(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
state = np.seterr(divide='ignore')
# TODO check y \in {0, 1} or {-1, 1}
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
np.seterr(**state)
return d2logpdf_dlink2
#d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
arg = np.where(y, inv_link_f, 1.-inv_link_f)
ret = -1./np.square(np.clip(arg, 1e-3, np.inf))
if np.any(np.isinf(ret)):
stop
return ret
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
"""

View file

@ -131,6 +131,56 @@ class Likelihood(Parameterized):
return z, mean, variance
def variational_expectations(self, Y, m, v, gh_points=None):
"""
Use Gauss-Hermite Quadrature to compute
E_p(f) [ log p(y|f) ]
d/dm E_p(f) [ log p(y|f) ]
d/dv E_p(f) [ log p(y|f) ]
where p(f) is a Gaussian with mean m and variance v. The shapes of Y, m and v should match.
if no gh_points are passed, we construct them using defualt options
"""
if gh_points is None:
gh_x, gh_w = np.polynomial.hermite.hermgauss(12)
else:
gh_x, gh_w = gh_points
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
#make a grid of points
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully.
logp = self.logpdf(X,Y[:,None])
dlogp_dx = self.dlogpdf_df(X, Y[:,None])
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
#clipping for numerical stability
logp = np.clip(logp,-1e6,1e6)
dlogp_dx = np.clip(dlogp_dx,-1e6,1e6)
d2logp_dx2 = np.clip(d2logp_dx2,-1e6,1e6)
#average over the gird to get derivatives of the Gaussian's parameters
F = np.dot(logp, gh_w)
dF_dm = np.dot(dlogp_dx, gh_w)
dF_dv = np.dot(d2logp_dx2, gh_w)/2.
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
stop
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
stop
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape)
def predictive_mean(self, mu, variance, Y_metadata=None):
"""
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )

View file

@ -70,7 +70,6 @@ class Probit(GPTransformation):
.. math::
g(f) = \\Phi^{-1} (mu)
"""
def transf(self,f):
@ -88,6 +87,34 @@ class Probit(GPTransformation):
f2 = f**2
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
class Cloglog(GPTransformation):
"""
Complementary log-log link
.. math::
p(f) = 1 - e^{-e^f}
or
f = \log (-\log(1-p))
"""
def transf(self,f):
return 1-np.exp(-np.exp(f))
def dtransf_df(self,f):
return np.exp(f-np.exp(f))
def d2transf_df2(self,f):
ef = np.exp(f)
return -np.exp(f-ef)*(ef-1.)
def d3transf_df3(self,f):
ef = np.exp(f)
return np.exp(f-ef)*(1.-3*ef + ef**2)
class Log(GPTransformation):
"""
.. math::

View file

@ -1,47 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import sympy as sym
sympy_available=True
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import gammaln, logisticln
except ImportError:
sympy_available=False
import numpy as np
import link_functions
from symbolic import Symbolic
from scipy import stats
class Negative_binomial(Symbolic):
"""
Negative binomial
.. math::
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
.. Note::
Y takes non zero integer values..
link function should have a positive domain, e.g. log (default).
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, dispersion=1.0):
parameters = {'dispersion':dispersion}
if gp_link is None:
gp_link = link_functions.Identity()
dispersion = sym.Symbol('dispersion', positive=True, real=True)
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
f_0 = sym.Symbol('f_0', positive=True, real=True)
gp_link = link_functions.Log()
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y_0)*sym.log(dispersion+f_0) + gammaln(y_0+dispersion) - gammaln(y_0+1) - gammaln(dispersion) + y_0*sym.log(f_0)
#log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion))
super(Negative_binomial, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Negative_binomial')
# TODO: Check this.
self.log_concave = False

View file

@ -5,7 +5,6 @@ from __future__ import division
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions
from likelihood import Likelihood

View file

@ -1,45 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from GPy.util.symbolic import normcdfln
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
#from GPy.util.functions import clip_exp
import link_functions
from symbolic import Symbolic
from scipy import stats
class Skew_exponential(Symbolic):
"""
Negative binomial
.. math::
.. Note::
Y takes real values.
link function is identity
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, shape=1.0, scale=1.0):
parameters={'scale':scale, 'shape':shape}
if gp_link is None:
gp_link = link_functions.Identity()
#func_modules = [{'exp':clip_exp}]
scale = sym.Symbol('scale', positive=True, real=True)
shape = sym.Symbol('shape', real=True)
y_0 = sym.Symbol('y_0', real=True)
f_0 = sym.Symbol('f_0', real=True)
log_pdf=sym.log(shape)-sym.log(scale)-((y_0-f_0)/scale) + normcdfln(shape*(y_0-f_0)/scale)
super(Skew_exponential, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Skew_exponential', parameters=parameters)
# TODO: Check this.
self.log_concave = True

View file

@ -1,52 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import sympy as sym
sympy_available=True
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import normcdfln, normcdf
except ImportError:
sympy_available=False
import numpy as np
from GPy.util.functions import clip_exp
import link_functions
from symbolic import Symbolic
from scipy import stats
class Skew_normal(Symbolic):
"""
Skew Normal distribution.
.. math::
.. Note::
Y takes real values.
link function is identity
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, shape=1.0, scale=1.0):
parameters = {'scale': scale, 'shape':shape}
if gp_link is None:
gp_link = link_functions.Identity()
# # this likelihood has severe problems with likelihoods saturating exponentials, so clip_exp is used in place of the true exp as a solution for dealing with the numerics.
# func_modules = [{'exp':clip_exp}]
func_modules = []
scale = sym.Symbol('scale', positive=True, real=True)
shape = sym.Symbol('shape', real=True)
y_0 = sym.Symbol('y_0', real=True)
f_0 = sym.Symbol('f_0', real=True)
log_pdf=-sym.log(scale)-1./2*sym.log(2*sym.pi)-1./2*((y_0-f_0)/scale)**2 + sym.log(2) + normcdfln(shape*(y_0-f_0)/scale)
super(Skew_normal, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Skew_normal', func_modules=func_modules)
self.log_concave = True

View file

@ -1,45 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import gammaln
import numpy as np
import link_functions
from symbolic import Symbolic
from scipy import stats
class SstudentT(Symbolic):
"""
Symbolic variant of the Student-t distribution.
.. math::
.. Note::
Y takes real values.
link function is identity
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, deg_free=5.0, t_scale2=1.0):
parameters = {'deg_free':5.0, 't_scale2':1.0}
if gp_link is None:
gp_link = link_functions.Identity()
# this likelihood has severe problems with likelihoods saturating ...
y_0 = sym.Symbol('y_0', real=True)
f_0 = sym.Symbol('f_0', real=True)
nu = sym.Symbol('nu', positive=True, real=True)
t_scale2 = sym.Symbol('t_scale2', positive=True, real=True)
log_pdf = (gammaln((nu + 1) * 0.5)
- gammaln(nu * 0.5)
- 0.5*sym.log(t_scale2 * nu * sym.pi)
- 0.5*(nu + 1)*sym.log(1 + (1/nu)*(((y_0-f_0)**2)/t_scale2)))
super(SstudentT, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='SstudentT')
self.log_concave = False

View file

@ -1,279 +0,0 @@
# Copyright (c) 2014 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
import numpy as np
from likelihood import Likelihood
from ..core.symbolic import Symbolic_core
class Symbolic(Likelihood, Symbolic_core):
"""
Symbolic likelihood.
Likelihood where the form of the likelihood is provided by a sympy expression.
"""
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, parameters=None, func_modules=[]):
if gp_link is None:
gp_link = link_functions.Identity()
if log_pdf is None:
raise ValueError, "You must provide an argument for the log pdf."
Likelihood.__init__(self, gp_link, name=name)
functions = {'log_pdf':log_pdf}
self.cacheable = ['F', 'Y']
self.missing_data = False
if missing_log_pdf:
self.missing_data = True
functions['missing_log_pdf']=missing_log_pdf
self.ep_analytic = False
if logZ:
self.ep_analytic = True
functions['logZ'] = logZ
self.cacheable += ['M', 'V']
Symbolic_core.__init__(self, functions, cacheable=self.cacheable, derivatives = ['F', 'theta'], parameters=parameters, func_modules=func_modules)
# TODO: Is there an easy way to check whether the pdf is log
self.log_concave = log_concave
def _set_derivatives(self, derivatives):
# these are arguments for computing derivatives.
print "Whoop"
Symbolic_core._set_derivatives(self, derivatives)
# add second and third derivatives for Laplace approximation.
derivative_arguments = []
if derivatives is not None:
for derivative in derivatives:
derivative_arguments += self.variables[derivative]
exprs = ['log_pdf']
if self.missing_data:
exprs.append('missing_log_pdf')
for expr in exprs:
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative']['f_0'], theta)) for theta in derivative_arguments}
self.expressions[expr]['third_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['second_derivative']['f_0'], theta)) for theta in derivative_arguments}
if self.ep_analytic:
derivative_arguments = [M]
# add second derivative for EP
exprs = ['logZ']
if self.missing_data:
exprs.append('missing_logZ')
for expr in exprs:
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative'], theta)) for theta in derivative_arguments}
def eval_update_cache(self, Y, **kwargs):
# TODO: place checks for inf/nan in here
# for all provided keywords
Symbolic_core.eval_update_cache(self, Y=Y, **kwargs)
# Y = np.atleast_2d(Y)
# for variable, code in sorted(self.code['parameters_changed'].iteritems()):
# self._set_attribute(variable, eval(code, self.namespace))
# for i, theta in enumerate(self.variables['Y']):
# missing = np.isnan(Y[:, i])
# self._set_attribute('missing_' + str(i), missing)
# self._set_attribute(theta.name, value[missing, i][:, None])
# for variable, value in kwargs.items():
# # update their cached values
# if value is not None:
# if variable == 'F' or variable == 'M' or variable == 'V' or variable == 'Y_metadata':
# for i, theta in enumerate(self.variables[variable]):
# self._set_attribute(theta.name, value[:, i][:, None])
# else:
# self._set_attribute(theta.name, value[:, i])
# for variable, code in sorted(self.code['update_cache'].iteritems()):
# self._set_attribute(variable, eval(code, self.namespace))
def parameters_changed(self):
pass
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
# The way the Laplace approximation is run requires the
# covariance function to compute the true gradient (because it
# is dependent on the mode). This means we actually compute
# the gradient outside this object. This function would
# normally ask the object to update its gradients internally,
# but here it provides them externally, because they are
# computed in the inference code. TODO: Thought: How does this
# effect EP? Shouldn't this be done by a separate
# Laplace-approximation specific call?
for theta, grad in zip(self.variables['theta'], grads):
parameter = getattr(self, theta.name)
setattr(parameter, 'gradient', grad)
def pdf_link(self, f, y, Y_metadata=None):
"""
Likelihood function given inverse link of f.
:param f: inverse link of latent variables.
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
return np.exp(self.logpdf_link(f, y, Y_metadata=None))
def logpdf_link(self, f, y, Y_metadata=None):
"""
Log Likelihood Function given inverse link of latent variables.
:param f: latent variables (inverse link of f)
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
if self.missing_data:
missing_flag = np.isnan(y)
not_missing_flag = np.logical_not(missing_flag)
ll = self.eval_function('missing_log_pdf', F=f[missing_flag]).sum()
ll += self.eval_function('log_pdf', F=f[not_missing_flag], Y=y[not_missing_flag], Y_metadata=Y_metadata[not_missing_flag]).sum()
else:
ll = self.eval_function('log_pdf', F=f, Y=y, Y_metadata=Y_metadata).sum()
return ll
def dlogpdf_dlink(self, f, y, Y_metadata=None):
"""
Gradient of log likelihood with respect to the inverse link function.
:param f: latent variables (inverse link of f)
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: gradient of likelihood with respect to each point.
:rtype: Nx1 array
"""
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
if self.missing_data:
return np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['derivative']['f_0'], self.namespace),
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
else:
return np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
def d2logpdf_dlink2(self, f, y, Y_metadata=None):
"""
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
:param f: inverse link of the latent variables.
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Returns diagonal of Hessian, since every where else it is
0, as the likelihood factorizes over cases (the
distribution for y_i depends only on link(f_i) not on
link(f_(j!=i))
"""
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
if self.missing_data:
return np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['second_derivative']['f_0'], self.namespace),
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
else:
return np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
def d3logpdf_dlink3(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
if self.missing_data:
return np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['third_derivative']['f_0'], self.namespace),
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
else:
return np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
for i, theta in enumerate(self.variables['theta']):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['derivative'][theta.name], self.namespace),
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
else:
g[:, i:i+1] = np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
return g.sum(0)
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
for i, theta in enumerate(self.variables['theta']):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['second_derivative'][theta.name], self.namespace),
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
else:
g[:, i:i+1] = np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
return g
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
for i, theta in enumerate(self.variables['theta']):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['third_derivative'][theta.name], self.namespace),
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
else:
g[:, i:i+1] = np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
return g
def predictive_mean(self, mu, sigma, Y_metadata=None):
raise NotImplementedError
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
raise NotImplementedError
def conditional_mean(self, gp):
raise NotImplementedError
def conditional_variance(self, gp):
raise NotImplementedError
def samples(self, gp, Y_metadata=None):
raise NotImplementedError

View file

@ -5,13 +5,3 @@ from kernel import Kernel
from linear import Linear
from mlp import MLP
#from rbf import RBF
# TODO need to fix this in a config file.
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
# These are likelihoods that rely on symbolic.
from symbolic import Symbolic

View file

@ -1,57 +0,0 @@
# Copyright (c) 2014 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from ..core.mapping import Mapping, Bijective_mapping
from ..core.symbolic import Symbolic_core
import numpy as np
class Symbolic(Mapping, Symbolic_core):
"""
Symbolic mapping
Mapping where the form of the mapping is provided by a sympy expression.
"""
def __init__(self, input_dim, output_dim, f=None, name='symbolic', parameters=None, func_modules=[]):
if f is None:
raise ValueError, "You must provide an argument for the function."
Mapping.__init__(self, input_dim, output_dim, name=name)
Symbolic_core.__init__(self, {'f': f}, ['X'], derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)
self._initialize_cache()
self.parameters_changed()
def _initialize_cache(self):
self._set_attribute('x_0', np.random.normal(size=(3, self.input_dim)))
def parameters_changed(self):
self.eval_parameters_changed()
def update_cache(self, X=None):
self.eval_update_cache(X=X)
def update_gradients(self, partial, X=None):
for name, val in self.eval_update_gradients('f', partial, X=X).iteritems():
setattr(getattr(self, name), 'gradient', val)
def gradients_X(self, partial, X=None):
return self.eval_gradients_X('f', partial, X=X)
def f(self, X=None):
"""
"""
return self.eval_function('f', X=X)
def df_dX(self, X):
"""
"""
pass
def df_dtheta(self, X):
pass

View file

@ -5,10 +5,8 @@ import numpy as np
from .. import kern
from ..core.sparse_gp_mpi import SparseGP_MPI
from ..likelihoods import Gaussian
from ..inference.optimization import SCG
from ..util import linalg
from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
import logging
@ -25,12 +23,14 @@ class BayesianGPLVM(SparseGP_MPI):
"""
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None):
Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
self.mpi_comm = mpi_comm
self.__IN_OPTIMIZATION__ = False
self.logger = logging.getLogger(self.__class__.__name__)
if X == None:
if X is None:
from ..util.initialization import initialize_latent
self.logger.info("initializing latent space X with method {}".format(init))
X, fracs = initialize_latent(init, input_dim, Y)
@ -59,24 +59,24 @@ class BayesianGPLVM(SparseGP_MPI):
X = NormalPosterior(X, X_variance)
if inference_method is None:
inan = np.isnan(Y)
if np.any(inan):
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
self.logger.debug("creating inference_method with var_dtc missing data")
inference_method = VarDTCMissingData(inan=inan)
elif mpi_comm is not None:
if mpi_comm is not None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else:
from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC()
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
if isinstance(inference_method,VarDTC_minibatch):
inference_method.mpi_comm = mpi_comm
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True
super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood, name=name, inference_method=inference_method, normalizer=normalizer, mpi_comm=mpi_comm, variational_prior=self.variational_prior)
super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method,
normalizer=normalizer, mpi_comm=mpi_comm,
variational_prior=self.variational_prior,
missing_data=missing_data, stochastic=stochastic,
batchsize=batchsize)
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""
@ -86,15 +86,62 @@ class BayesianGPLVM(SparseGP_MPI):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
kl_fctr = 1.
if self.missing_data:
d = self.output_dim
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
else:
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
# Subsetting Variational Posterior objects, makes the gradients
# empty. We need them to be 0 though:
X.mean.gradient[:] = 0
X.variance.gradient[:] = 0
self.variational_prior.update_gradients_KL(X)
if self.missing_data:
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
else:
current_values['meangrad'] += kl_fctr*X.mean.gradient
current_values['vargrad'] += kl_fctr*X.variance.gradient
if subset_indices is not None:
value_indices['meangrad'] = subset_indices['samples']
value_indices['vargrad'] = subset_indices['samples']
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
def _outer_values_update(self, full_values):
"""
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
super(BayesianGPLVM, self)._outer_values_update(full_values)
self.X.mean.gradient = full_values['meangrad']
self.X.variance.gradient = full_values['vargrad']
def _outer_init_full_values(self):
return dict(meangrad=np.zeros(self.X.mean.shape),
vargrad=np.zeros(self.X.variance.shape))
def parameters_changed(self):
super(BayesianGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
return
super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
#super(BayesianGPLVM, self).parameters_changed()
#self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
#self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# This is testing code -------------------------
# i = np.random.randint(self.X.shape[0])
@ -113,7 +160,7 @@ class BayesianGPLVM(SparseGP_MPI):
# -----------------------------------------------
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
#self.variational_prior.update_gradients_KL(self.X)
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
@ -178,7 +225,7 @@ class BayesianGPLVM(SparseGP_MPI):
"""
dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]):
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
dmu_dX += self.kern.gradients_X(self.grad_dict['dL_dpsi1'][i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX
def dmu_dXnew(self, Xnew):
@ -189,7 +236,7 @@ class BayesianGPLVM(SparseGP_MPI):
ones = np.ones((1, 1))
for i in range(self.Z.shape[0]):
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(gradients_X, self.Cpsi1Vf)
return np.dot(gradients_X, self.grad_dict['dL_dpsi1'])
def plot_steepest_gradient_map(self, *args, ** kwargs):
"""

View file

@ -352,8 +352,8 @@ class TestNoiseModels(object):
print model
#print model._get_params()
np.testing.assert_almost_equal(
model.pdf(f.copy(), Y.copy()),
np.exp(model.logpdf(f.copy(), Y.copy()))
model.pdf(f.copy(), Y.copy()).prod(),
np.exp(model.logpdf(f.copy(), Y.copy()).sum())
)
@with_setup(setUp, tearDown)

View file

@ -20,7 +20,7 @@ class MiscTests(unittest.TestCase):
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
m.randomize()
m.likelihood.variance = .5
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.likelihood.variance)
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N) * m.likelihood.variance)
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
@ -43,7 +43,7 @@ class MiscTests(unittest.TestCase):
Z = m.Z[:]
X = self.X[:]
#Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
# Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
Kinv = m.posterior.woodbury_inv
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
@ -51,13 +51,13 @@ class MiscTests(unittest.TestCase):
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(covar.shape, (self.N_new, self.N_new))
np.testing.assert_almost_equal(K_hat, covar)
#np.testing.assert_almost_equal(mu_hat, mu)
# np.testing.assert_almost_equal(mu_hat, mu)
mu, var = m._raw_predict(self.X_new)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(var.shape, (self.N_new, 1))
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
#np.testing.assert_almost_equal(mu_hat, mu)
# np.testing.assert_almost_equal(mu_hat, mu)
def test_likelihood_replicate(self):
m = GPy.models.GPRegression(self.X, self.Y)
@ -110,22 +110,45 @@ class MiscTests(unittest.TestCase):
m2.kern.lengthscale = m.kern['.*lengthscale']
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
def test_missing_data(self):
from GPy import kern
from GPy.models import BayesianGPLVM
from GPy.examples.dimensionality_reduction import _simulate_matern
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
Y = Ylist[0]
inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy()
Ymissing[inan] = np.nan
k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
def test_likelihood_replicate_kern(self):
m = GPy.models.GPRegression(self.X, self.Y)
m2 = GPy.models.GPRegression(self.X, self.Y)
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize()
m2.kern[''] = m.kern[:]
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize()
m2.kern[:] = m.kern[:]
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize()
m2.kern[''] = m.kern['']
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize()
m2.kern[:] = m.kern[''].values()
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
def test_big_model(self):
m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
@ -158,7 +181,7 @@ class MiscTests(unittest.TestCase):
def test_model_optimize(self):
X = np.random.uniform(-3., 3., (20, 1))
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
m = GPy.models.GPRegression(X,Y)
m = GPy.models.GPRegression(X, Y)
m.optimize()
print m
@ -219,8 +242,8 @@ class GradientTests(np.testing.TestCase):
mlp = GPy.kern.MLP(1)
self.check_model(mlp, model_type='GPRegression', dimension=1)
#TODO:
#def test_GPRegression_poly_1d(self):
# TODO:
# def test_GPRegression_poly_1d(self):
# ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
# mlp = GPy.kern.Poly(1, degree=5)
# self.check_model(mlp, model_type='GPRegression', dimension=1)
@ -417,11 +440,11 @@ class GradientTests(np.testing.TestCase):
def test_gp_heteroscedastic_regression(self):
num_obs = 25
X = np.random.randint(0,140,num_obs)
X = X[:,None]
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
X = np.random.randint(0, 140, num_obs)
X = X[:, None]
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPHeteroscedasticRegression(X,Y,kern)
m = GPy.models.GPHeteroscedasticRegression(X, Y, kern)
self.assertTrue(m.checkgrad())
def test_gp_kronecker_gaussian(self):
@ -432,12 +455,12 @@ class GradientTests(np.testing.TestCase):
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
Y = np.random.randn(N1, N2)
Y = Y-Y.mean(0)
Y = Y/Y.std(0)
Y = Y - Y.mean(0)
Y = Y / Y.std(0)
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
# build the model the dumb way
assert (N1*N2<1000), "too much data for standard GPs!"
assert (N1 * N2 < 1000), "too much data for standard GPs!"
yy, xx = np.meshgrid(X2, X1)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
@ -458,11 +481,11 @@ class GradientTests(np.testing.TestCase):
def test_gp_VGPC(self):
num_obs = 25
X = np.random.randint(0,140,num_obs)
X = X[:,None]
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
X = np.random.randint(0, 140, num_obs)
X = X[:, None]
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern)
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
self.assertTrue(m.checkgrad())

View file

@ -18,12 +18,3 @@ import multioutput
import linalg_gpu
import mpi
try:
import sympy
_sympy_available = True
del sympy
except ImportError as e:
_sympy_available = False
if _sympy_available:
import symbolic

View file

@ -188,4 +188,6 @@ class Cache_this(object):
self.ignore_args = ignore_args
self.force_args = force_kwargs
def __call__(self, f):
return Cacher_wrap(f, self.limit, self.ignore_args, self.force_args)
newf = Cacher_wrap(f, self.limit, self.ignore_args, self.force_args)
update_wrapper(newf, f)
return newf

View file

@ -964,7 +964,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
if not data_available(dataset):
download_data(dataset)
from pandas import read_csv
from pandas import read_csv, isnull
dir_path = os.path.join(data_path, dataset)
# read the info .soft
@ -978,11 +978,26 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
sample_info = sample_info[['geo_accession', 'characteristics_ch1', 'description']]
sample_info = sample_info.ix[:, np.r_[0:3, 5:sample_info.shape[1]]]
sample_info = sample_info.iloc[:, np.r_[0:4, 5:sample_info.shape[1]]]
c = sample_info.columns.to_series()
c[1:4] = ['strain', 'cross', 'developmental_stage']
sample_info.columns = c
# get the labels right:
rep = re.compile('\(.*\)')
def filter_dev_stage(row):
if isnull(row):
row = "2-cell stage embryo"
if row.startswith("developmental stage: "):
row = row[len("developmental stage: "):]
if row == 'adult':
row += " liver"
row = row.replace(' stage ', ' ')
row = rep.sub(' ', row)
row = row.strip(' ')
return row
labels = sample_info.developmental_stage.apply(filter_dev_stage)
# Extract the tar file
filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
with tarfile.open(filename, 'r') as files:
@ -1016,8 +1031,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
sample_info.index = data.index
# get the labels from the description
rep = re.compile('fibroblast|\d+-cell|embryo|liver|blastocyst|blastomere|zygote', re.IGNORECASE)
labels = sample_info.developmental_stage.apply(lambda row: " ".join(rep.findall(row)))
#rep = re.compile('fibroblast|\d+-cell|embryo|liver|early blastocyst|mid blastocyst|late blastocyst|blastomere|zygote', re.IGNORECASE)
sys.stdout.write(' '*len(message) + '\r')
sys.stdout.flush()

File diff suppressed because one or more lines are too long

View file

@ -5,15 +5,15 @@ Created on 24 Feb 2014
'''
import numpy as np
from GPy.util.pca import pca
from GPy.util.pca import PCA
def initialize_latent(init, input_dim, Y):
Xr = np.asfortranarray(np.random.randn(Y.shape[0], input_dim))
if init == 'PCA':
p = pca(Y)
p = PCA(Y)
PC = p.project(Y, min(input_dim, Y.shape[1]))
Xr[:PC.shape[0], :PC.shape[1]] = PC
var = p.fracs[:input_dim]
var = .1*p.fracs[:input_dim]
else:
var = Xr.var(0)

View file

@ -11,14 +11,16 @@ try:
except:
pass
from numpy.linalg.linalg import LinAlgError
from operator import setitem
import itertools
class pca(object):
class PCA(object):
"""
pca module with automatic primal/dual determination.
PCA module with automatic primal/dual determination.
"""
def __init__(self, X):
self.mu = X.mean(0)
self.sigma = X.std(0)
self.mu = None
self.sigma = None
X = self.center(X)
@ -37,8 +39,15 @@ class pca(object):
def center(self, X):
"""
Center `X` in pca space.
Center `X` in PCA space.
"""
X = X.copy()
inan = numpy.isnan(X)
if self.mu is None:
X_ = numpy.ma.masked_array(X, inan)
self.mu = X_.mean(0).base
self.sigma = X_.std(0).base
reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None)
X = X - self.mu
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
return X
@ -57,7 +66,7 @@ class pca(object):
def project(self, X, Q=None):
"""
Project X into pca space, defined by the Q highest eigenvalues.
Project X into PCA space, defined by the Q highest eigenvalues.
Y = X dot V
"""
if Q is None:
@ -71,13 +80,16 @@ class pca(object):
"""
Plot fractions of Eigenvalues sorted in descending order.
"""
from GPy.plotting.matplot_dep import Tango
Tango.reset()
col = Tango.nextMedium()
if ax is None:
fig = pylab.figure(fignum)
ax = fig.add_subplot(111)
if Q is None:
Q = self.Q
ticks = numpy.arange(Q)
bar = ax.bar(ticks - .4, self.fracs[:Q])
bar = ax.bar(ticks - .4, self.fracs[:Q], color=col)
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
ax.set_ylabel("Eigenvalue fraction")
ax.set_xlabel("PC")
@ -94,7 +106,7 @@ class pca(object):
fignum=None, cmap=None, # @UndefinedVariable
** kwargs):
"""
Plot dimensions `dimensions` with given labels against each other in
Plot dimensions `dimensions` with given labels against each other in
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
Labels can be drawn with a subsequent call to legend()
"""

View file

@ -1,367 +0,0 @@
import sys
import numpy as np
import sympy as sym
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
from sympy.matrices import Matrix
########################################
## Try to do some matrix functions: problem, you can't do derivatives
## with respect to matrix functions :-(
class GPySymMatrix(Matrix):
def __init__(self, indices):
Matrix.__init__(self)
def atoms(self):
return [e2 for e in self for e2 in e.atoms()]
class selector(Function):
"""A function that returns an element of a Matrix depending on input indices."""
nargs = 3
def fdiff(self, argindex=1):
return selector(*self.args)
@classmethod
def eval(cls, X, i, j):
if i.is_Number and j.is_Number:
return X[i, j]
##################################################
class logistic(Function):
"""The logistic function as a symbolic function."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return logistic(x)*(1-logistic(x))
@classmethod
def eval(cls, x):
if x.is_Number:
return 1/(1+exp(-x))
class logisticln(Function):
"""The log logistic, which can often be computed with more precision than the simply taking log(logistic(x)) when x is small or large."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return 1-logistic(x)
@classmethod
def eval(cls, x):
if x.is_Number:
return -np.log(1+exp(-x))
class erfc(Function):
"""The complementary error function, erfc(x) = 1-erf(x). Used as a helper function, particularly for erfcx, the scaled complementary error function. and the normal distributions cdf."""
nargs = 1
@classmethod
def eval(cls, arg):
return 1-erf(arg)
class erfcx(Function):
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return x*erfcx(x)-2/sqrt(pi)
@classmethod
def eval(cls, x):
if x.is_Number:
return exp(x**2)*erfc(x)
class gammaln(Function):
"""The log of the gamma function, which is often needed instead of log(gamma(x)) for better accuracy for large x."""
nargs = 1
def fdiff(self, argindex=1):
x=self.args[0]
return polygamma(0, x)
@classmethod
def eval(cls, x):
if x.is_Number:
return log(gamma(x))
class normcdfln(Function):
"""The log of the normal cdf. Can often be computed with better accuracy than log(normcdf(x)), particulary when x is either small or large."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
#return -erfcx(-x/sqrt(2))/sqrt(2*pi)
#return exp(-normcdfln(x) - 0.5*x*x)/sqrt(2*pi)
return sqrt(2/pi)*1/erfcx(-x/sqrt(2))
@classmethod
def eval(cls, x):
if x.is_Number:
return log(normcdf(x))
def _eval_is_real(self):
return self.args[0].is_real
class normcdf(Function):
"""The cumulative distribution function of the standard normal. Provided as a convenient helper function. It is computed throught -0.5*erfc(-x/sqrt(2))."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return gaussian(x)
@classmethod
def eval(cls, x):
if x.is_Number:
return 0.5*(erfc(-x/sqrt(2)))
def _eval_is_real(self):
return self.args[0].is_real
class normalln(Function):
"""The log of the standard normal distribution."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return -x
@classmethod
def eval(cls, x):
if x.is_Number:
return 0.5*sqrt(2*pi) - 0.5*x*x
def _eval_is_real(self):
return True
class normal(Function):
"""The standard normal distribution. Provided as a convenience function."""
nargs = 1
@classmethod
def eval(cls, x):
return 1/sqrt(2*pi)*exp(-0.5*x*x)
def _eval_is_real(self):
return True
class differfln(Function):
nargs = 2
def fdiff(self, argindex=2):
if argindex == 2:
x0, x1 = self.args
return -2/(sqrt(pi)*(erfcx(x1)-exp(x1**2-x0**2)*erfcx(x0)))
elif argindex == 1:
x0, x1 = self.args
return 2/(sqrt(pi)*(exp(x0**2-x1**2)*erfcx(x1)-erfcx(x0)))
else:
raise ArgumentIndexError(self, argindex)
@classmethod
def eval(cls, x0, x1):
if x0.is_Number and x1.is_Number:
return log(erfc(x1)-erfc(x0))
class dh_dd_i(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
diff_t = (t-tprime)
l2 = l*l
h = h(t, tprime, d_i, d_j, l)
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
base = ((0.5*d_i*l2*(d_i+d_j)-1)*h
+ (-diff_t*sign_val*exp(half_l_di*half_l_di
-d_i*diff_t
+ln_part_1)
+t*sign_val*exp(half_l_di*half_l_di
-d_i*t-d_j*tprime
+ln_part_2))
+ l/sqrt(pi)*(-exp(-diff_t*diff_t/l2)
+exp(-tprime*tprime/l2-d_i*t)
+exp(-t*t/l2-d_j*tprime)
-exp(-(d_i*t + d_j*tprime))))
return base/(d_i+d_j)
class dh_dd_j(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
diff_t = (t-tprime)
l2 = l*l
half_l_di = 0.5*l*d_i
h = h(t, tprime, d_i, d_j, l)
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
sign_val = sign(t/l)
base = tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2)-h
return base/(d_i+d_j)
class dh_dl(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
diff_t = (t-tprime)
l2 = l*l
h = h(t, tprime, d_i, d_j, l)
return 0.5*d_i*d_i*l*h + 2./(sqrt(pi)*(d_i+d_j))*((-diff_t/l2-d_i/2.)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2.)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2.)*exp(-t*t/l2-d_j*tprime)-d_i/2.*exp(-(d_i*t+d_j*tprime)))
class dh_dt(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
if (t is S.NaN
or tprime is S.NaN
or d_i is S.NaN
or d_j is S.NaN
or l is S.NaN):
return S.NaN
else:
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
return (sign_val*exp(half_l_di*half_l_di
- d_i*(t-tprime)
+ ln_part_1
- log(d_i + d_j))
- sign_val*exp(half_l_di*half_l_di
- d_i*t - d_j*tprime
+ ln_part_2
- log(d_i + d_j))).diff(t)
class dh_dtprime(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
if (t is S.NaN
or tprime is S.NaN
or d_i is S.NaN
or d_j is S.NaN
or l is S.NaN):
return S.NaN
else:
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
return (sign_val*exp(half_l_di*half_l_di
- d_i*(t-tprime)
+ ln_part_1
- log(d_i + d_j))
- sign_val*exp(half_l_di*half_l_di
- d_i*t - d_j*tprime
+ ln_part_2
- log(d_i + d_j))).diff(tprime)
class h(Function):
nargs = 5
def fdiff(self, argindex=5):
t, tprime, d_i, d_j, l = self.args
if argindex == 1:
return dh_dt(t, tprime, d_i, d_j, l)
elif argindex == 2:
return dh_dtprime(t, tprime, d_i, d_j, l)
elif argindex == 3:
return dh_dd_i(t, tprime, d_i, d_j, l)
elif argindex == 4:
return dh_dd_j(t, tprime, d_i, d_j, l)
elif argindex == 5:
return dh_dl(t, tprime, d_i, d_j, l)
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
if (t is S.NaN
or tprime is S.NaN
or d_i is S.NaN
or d_j is S.NaN
or l is S.NaN):
return S.NaN
else:
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
return (sign_val*exp(half_l_di*half_l_di
- d_i*(t-tprime)
+ ln_part_1
- log(d_i + d_j))
- sign_val*exp(half_l_di*half_l_di
- d_i*t - d_j*tprime
+ ln_part_2
- log(d_i + d_j)))
# return (exp((d_j/2.*l)**2)/(d_i+d_j)
# *(exp(-d_j*(tprime - t))
# *(erf((tprime-t)/l - d_j/2.*l)
# + erf(t/l + d_j/2.*l))
# - exp(-(d_j*tprime + d_i))
# *(erf(tprime/l - d_j/2.*l)
# + erf(d_j/2.*l))))