Merge remote-tracking branch 'upstream/devel' into devel

Conflicts:
	GPy/kern/__init__.py
This commit is contained in:
tjhgit 2015-03-08 09:42:33 +01:00
commit e115778d74
36 changed files with 2032 additions and 196 deletions

View file

@ -124,6 +124,7 @@ class GP(Model):
else: else:
self.X = ObsAr(X) self.X = ObsAr(X)
self.update_model(True) self.update_model(True)
self._trigger_params_changed()
def set_X(self,X): def set_X(self,X):
""" """

View file

@ -154,7 +154,7 @@ class Model(Parameterized):
""" """
return -(self._log_likelihood_gradients() + self._log_prior_gradients()) return -(self._log_likelihood_gradients() + self._log_prior_gradients())
def _objective_grads(self, x): def _grads(self, x):
""" """
Gets the gradients from the likelihood and the priors. Gets the gradients from the likelihood and the priors.
@ -200,7 +200,7 @@ class Model(Parameterized):
return np.inf return np.inf
return obj return obj
def _objective_and_grads(self, x): def _objective_grads(self, x):
try: try:
self.optimizer_array = x self.optimizer_array = x
obj_f, self.obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients()) obj_f, self.obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
@ -213,7 +213,7 @@ class Model(Parameterized):
self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10) self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10)
return obj_f, self.obj_grads return obj_f, self.obj_grads
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=False, **kwargs): def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
@ -255,9 +255,10 @@ class Model(Parameterized):
else: else:
optimizer = optimization.get_optimizer(optimizer) optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, max_iters=max_iters, **kwargs) opt = optimizer(start, model=self, max_iters=max_iters, **kwargs)
with VerboseOptimization(self, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook): with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook) as vo:
opt.run(f_fp=self._objective_and_grads, f=self._objective, fp=self._objective_grads) opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
vo.finish(opt)
self.optimization_runs.append(opt) self.optimization_runs.append(opt)
@ -314,7 +315,7 @@ class Model(Parameterized):
# evaulate around the point x # evaulate around the point x
f1 = self._objective(x + dx) f1 = self._objective(x + dx)
f2 = self._objective(x - dx) f2 = self._objective(x - dx)
gradient = self._objective_grads(x) gradient = self._grads(x)
dx = dx[transformed_index] dx = dx[transformed_index]
gradient = gradient[transformed_index] gradient = gradient[transformed_index]
@ -360,7 +361,7 @@ class Model(Parameterized):
print "No free parameters to check" print "No free parameters to check"
return return
gradient = self._objective_grads(x).copy() gradient = self._grads(x).copy()
np.where(gradient == 0, 1e-312, gradient) np.where(gradient == 0, 1e-312, gradient)
ret = True ret = True
for nind, xind in itertools.izip(param_index, transformed_index): for nind, xind in itertools.izip(param_index, transformed_index):
@ -401,12 +402,14 @@ class Model(Parameterized):
model_details = [['<b>Model</b>', self.name + '<br>'], model_details = [['<b>Model</b>', self.name + '<br>'],
['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))], ['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)], ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)],
["<b>Updates</b>", '{}<br>'.format(self._updates)], ["<b>Updates</b>", '{}<br>'.format(self._update_on)],
] ]
from operator import itemgetter from operator import itemgetter
to_print = ["""<style type="text/css"> to_print = ["""<style type="text/css">
.pd{ .pd{
font-family:"Courier New", Courier, monospace !important; font-family: "Courier New", Courier, monospace !important;
width: 100%;
padding: 3px;
} }
</style>\n"""] + ["<p class=pd>"] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["</p>"] </style>\n"""] + ["<p class=pd>"] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["</p>"]
to_print.append(super(Model, self)._repr_html_()) to_print.append(super(Model, self)._repr_html_())
@ -416,7 +419,7 @@ class Model(Parameterized):
model_details = [['Name', self.name], model_details = [['Name', self.name],
['Log-likelihood', '{}'.format(float(self.log_likelihood()))], ['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
["Number of Parameters", '{}'.format(self.size)], ["Number of Parameters", '{}'.format(self.size)],
["Updates", '{}'.format(self._updates)], ["Updates", '{}'.format(self._update_on)],
] ]
from operator import itemgetter from operator import itemgetter
max_len = reduce(lambda a, b: max(len(b[0]), a), model_details, 0) max_len = reduce(lambda a, b: max(len(b[0]), a), model_details, 0)

View file

@ -14,6 +14,10 @@ class Observable(object):
super(Observable, self).__init__() super(Observable, self).__init__()
from lists_and_dicts import ObserverList from lists_and_dicts import ObserverList
self.observers = ObserverList() self.observers = ObserverList()
self._update_on = True
def set_updates(self, on=True):
self._update_on = on
def add_observer(self, observer, callble, priority=0): def add_observer(self, observer, callble, priority=0):
""" """
@ -51,15 +55,16 @@ class Observable(object):
:param min_priority: only notify observers with priority > min_priority :param min_priority: only notify observers with priority > min_priority
if min_priority is None, notify all observers in order if min_priority is None, notify all observers in order
""" """
if which is None: if self._update_on:
which = self if which is None:
if min_priority is None: which = self
[callble(self, which=which) for _, _, callble in self.observers] if min_priority is None:
else: [callble(self, which=which) for _, _, callble in self.observers]
for p, _, callble in self.observers: else:
if p <= min_priority: for p, _, callble in self.observers:
break if p <= min_priority:
callble(self, which=which) break
callble(self, which=which)
def change_priority(self, observer, callble, priority): def change_priority(self, observer, callble, priority):
self.remove_observer(observer, callble) self.remove_observer(observer, callble)

View file

@ -84,6 +84,7 @@ class Param(Parameterizable, ObsAr):
self._original_ = getattr(obj, '_original_', None) self._original_ = getattr(obj, '_original_', None)
self._name = getattr(obj, '_name', None) self._name = getattr(obj, '_name', None)
self._gradient_array_ = getattr(obj, '_gradient_array_', None) self._gradient_array_ = getattr(obj, '_gradient_array_', None)
self._update_on = getattr(obj, '_update_on', None)
self.constraints = getattr(obj, 'constraints', None) self.constraints = getattr(obj, 'constraints', None)
self.priors = getattr(obj, 'priors', None) self.priors = getattr(obj, 'priors', None)
@ -273,7 +274,7 @@ class Param(Parameterizable, ObsAr):
header = header_format.format(x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing header = header_format.format(x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle(['']) if not ties: ties = itertools.cycle([''])
return "\n".join(["""<style type="text/css"> return "\n".join(["""<style type="text/css">
.tg {padding:2px 3px;word-break:normal;border-collapse:collapse;border-spacing:0;border-color:#DCDCDC;margin:0px auto;} .tg {padding:2px 3px;word-break:normal;border-collapse:collapse;border-spacing:0;border-color:#DCDCDC;margin:0px auto;width:100%;}
.tg td{font-family:"Courier New", Courier, monospace !important;font-weight:bold;color:#444;background-color:#F7FDFA;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;} .tg td{font-family:"Courier New", Courier, monospace !important;font-weight:bold;color:#444;background-color:#F7FDFA;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
.tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;} .tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
.tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;} .tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;}
@ -360,7 +361,7 @@ class ParamConcatenation(object):
#=========================================================================== #===========================================================================
def update_all_params(self): def update_all_params(self):
for par in self.parents: for par in self.parents:
par.notify_observers() par.trigger_update(trigger_parent=False)
def constrain(self, constraint, warning=True): def constrain(self, constraint, warning=True):
[param.constrain(constraint, trigger_parent=False) for param in self.params] [param.constrain(constraint, trigger_parent=False) for param in self.params]

View file

@ -471,7 +471,7 @@ class Indexable(Nameable, Updateable):
self.param_array[...] = transform.initialize(self.param_array) self.param_array[...] = transform.initialize(self.param_array)
reconstrained = self.unconstrain() reconstrained = self.unconstrain()
added = self._add_to_index_operations(self.constraints, reconstrained, transform, warning) added = self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
self.notify_observers(self, None if trigger_parent else -np.inf) self.trigger_update(trigger_parent)
return added return added
def unconstrain(self, *transforms): def unconstrain(self, *transforms):
@ -1042,6 +1042,9 @@ class Parameterizable(OptimizationHandlable):
p = param_to_array(p) p = param_to_array(p)
d = f.create_dataset(n,p.shape,dtype=p.dtype) d = f.create_dataset(n,p.shape,dtype=p.dtype)
d[:] = p d[:] = p
if hasattr(self, 'param_array'):
d = f.create_dataset('param_array',self.param_array.shape, dtype=self.param_array.dtype)
d[:] = self.param_array
f.close() f.close()
except: except:
raise 'Fails to write the parameters into a HDF5 file!' raise 'Fails to write the parameters into a HDF5 file!'

View file

@ -156,7 +156,7 @@ class Parameterized(Parameterizable):
p._parent_index_ += 1 p._parent_index_ += 1
self.parameters.insert(index, param) self.parameters.insert(index, param)
param.add_observer(self, self._pass_through_notify_observers, -1000) param.add_observer(self, self._pass_through_notify_observers, -np.inf)
parent = self parent = self
while parent is not None: while parent is not None:
@ -296,7 +296,7 @@ class Parameterized(Parameterizable):
self.param_array[name] = value self.param_array[name] = value
except: except:
raise ValueError, "Setting by slice or index only allowed with array-like" raise ValueError, "Setting by slice or index only allowed with array-like"
self._trigger_params_changed() self.trigger_update()
else: else:
try: param = self.__getitem__(name, paramlist) try: param = self.__getitem__(name, paramlist)
except: raise except: raise
@ -393,7 +393,7 @@ class Parameterized(Parameterizable):
</tr>""".format(name=name) </tr>""".format(name=name)
to_print.insert(0, header) to_print.insert(0, header)
style = """<style type="text/css"> style = """<style type="text/css">
.tg {font-family:"Courier New", Courier, monospace !important;padding:2px 3px;word-break:normal;border-collapse:collapse;border-spacing:0;border-color:#DCDCDC;margin:0px auto;} .tg {font-family:"Courier New", Courier, monospace !important;padding:2px 3px;word-break:normal;border-collapse:collapse;border-spacing:0;border-color:#DCDCDC;margin:0px auto;width:100%;}
.tg td{font-family:"Courier New", Courier, monospace !important;font-weight:bold;color:#444;background-color:#F7FDFA;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;} .tg td{font-family:"Courier New", Courier, monospace !important;font-weight:bold;color:#444;background-color:#F7FDFA;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
.tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;} .tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
.tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;} .tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;}

View file

@ -549,7 +549,8 @@ class DGPLVM(Prior):
M_i = np.zeros((self.classnum, self.dim)) M_i = np.zeros((self.classnum, self.dim))
for i in cls: for i in cls:
# Mean of each class # Mean of each class
M_i[i] = np.mean(cls[i], axis=0) class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i return M_i
# Adding data points as tuple to the dictionary so that we can access indices # Adding data points as tuple to the dictionary so that we can access indices
@ -661,7 +662,8 @@ class DGPLVM(Prior):
Sw = self.compute_Sw(cls, M_i) Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function # This function calculates derivative of the log of prior function
@ -680,8 +682,9 @@ class DGPLVM(Prior):
# Calculating inverse of Sb and its transpose and minus # Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
# Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw) Sw_trans = np.transpose(Sw)
@ -706,7 +709,230 @@ class DGPLVM(Prior):
return np.random.rand(n) # A WRONG implementation return np.random.rand(n) # A WRONG implementation
def __str__(self): def __str__(self):
return 'DGPLVM_prior' return 'DGPLVM_prior_Raq'
class DGPLVM_T(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
:param sigma2: constant
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape, vec):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
self.vec = vec
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
return cls
# This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls, vec):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
class_i = np.multiply(cls[i],vec)
M_i[i] = np.mean(class_i, axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
t = (j, x[j])
data_idx[class_label].append(t)
return data_idx
# Adding indices to the list so we can access whole the indices
def compute_listIndices(self, data_idx):
lst_idx = []
lst_idx_all = []
for i in data_idx:
if len(lst_idx) == 0:
pass
#Do nothing, because it is the first time list is created so is empty
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in xrange(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
# This function calculates between classes variances
def compute_Sb(self, cls, M_i, M_0):
Sb = np.zeros((self.dim, self.dim))
for i in cls:
B = (M_i[i] - M_0).reshape(self.dim, 1)
B_trans = B.transpose()
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
return Sb
# This function calculates within classes variances
def compute_Sw(self, cls, M_i):
Sw = np.zeros((self.dim, self.dim))
for i in cls:
N_i = float(len(cls[i]))
W_WT = np.zeros((self.dim, self.dim))
for xk in cls[i]:
W = (xk - M_i[i])
W_WT += np.outer(W, W)
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
return Sw
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
for i in data_idx:
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in xrange(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
beta = (float(1) / N_i) - (float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
else:
beta = -(float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
return Sig_beta_B_i_all
# Calculating W_j s separately so we can access all the W_j s anytime
def compute_wj(self, data_idx, M_i):
W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
xj = tpl[1]
j = tpl[0]
W_i[j] = (xj - M_i[i])
return W_i
# Calculating alpha and Wj for Sw
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
k = tpl[0]
for j in lst_idx_all[i]:
if k == j:
alpha = 1 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
else:
alpha = 0 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
return Sig_alpha_W_i
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls, self.vec)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#print 'SB_inv: ', Sb_inv_N
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls, self.vec)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x)
lst_idx_all = self.compute_listIndices(data_idx)
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
W_i = self.compute_wj(data_idx, M_i)
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#print 'SB_inv: ',Sb_inv_N
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
# Calculating DJ/DXk
DJ_Dxk = 2 * (
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
Sig_alpha_W_i))
# Calculating derivative of the log of the prior
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
return DPx_Dx.T
# def frb(self, x):
# from functools import partial
# from GPy.models import GradientChecker
# f = partial(self.lnpdf)
# df = partial(self.lnpdf_grad)
# grad = GradientChecker(f, df, x, 'X')
# grad.checkgrad(verbose=1)
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior_Raq_TTT'
class HalfT(Prior): class HalfT(Prior):
""" """

View file

@ -79,8 +79,10 @@ class Logexp(Transformation):
class NormalTheta(Transformation): class NormalTheta(Transformation):
"Do not use, not officially supported!"
_instances = [] _instances = []
def __new__(cls, mu_indices, var_indices): def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:
@ -143,9 +145,10 @@ class NormalTheta(Transformation):
self.var_indices = state[1] self.var_indices = state[1]
class NormalNaturalAntti(NormalTheta): class NormalNaturalAntti(NormalTheta):
"Do not use, not officially supported!"
_instances = [] _instances = []
_logexp = Logexp() def __new__(cls, mu_indices=None, var_indices=None):
def __new__(cls, mu_indices, var_indices): "Do not use, not officially supported!"
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:
@ -182,8 +185,10 @@ class NormalNaturalAntti(NormalTheta):
return "natantti" return "natantti"
class NormalEta(Transformation): class NormalEta(Transformation):
"Do not use, not officially supported!"
_instances = [] _instances = []
def __new__(cls, mu_indices, var_indices): def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:
@ -223,8 +228,10 @@ class NormalEta(Transformation):
return "eta" return "eta"
class NormalNaturalThroughTheta(NormalTheta): class NormalNaturalThroughTheta(NormalTheta):
"Do not use, not officially supported!"
_instances = [] _instances = []
def __new__(cls, mu_indices, var_indices): def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:
@ -272,8 +279,10 @@ class NormalNaturalThroughTheta(NormalTheta):
class NormalNaturalWhooot(NormalTheta): class NormalNaturalWhooot(NormalTheta):
"Do not use, not officially supported!"
_instances = [] _instances = []
def __new__(cls, mu_indices, var_indices): def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:
@ -307,8 +316,10 @@ class NormalNaturalWhooot(NormalTheta):
return "natgrad" return "natgrad"
class NormalNaturalThroughEta(NormalEta): class NormalNaturalThroughEta(NormalEta):
"Do not use, not officially supported!"
_instances = [] _instances = []
def __new__(cls, mu_indices, var_indices): def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:

View file

@ -11,7 +11,6 @@ class Updateable(Observable):
A model can be updated or not. A model can be updated or not.
Make sure updates can be switched on and off. Make sure updates can be switched on and off.
""" """
_updates = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Updateable, self).__init__(*args, **kwargs) super(Updateable, self).__init__(*args, **kwargs)
@ -27,18 +26,18 @@ class Updateable(Observable):
None: get the current update state None: get the current update state
""" """
if updates is None: if updates is None:
p = getattr(self, '_highest_parent_', None) return self._update_on
if p is not None:
self._updates = p._updates
return self._updates
assert isinstance(updates, bool), "updates are either on (True) or off (False)" assert isinstance(updates, bool), "updates are either on (True) or off (False)"
p = getattr(self, '_highest_parent_', None) p = getattr(self, '_highest_parent_', None)
if p is not None: def turn_updates(s):
p._updates = updates s._update_on = updates
self._updates = updates p.traverse(turn_updates)
self.trigger_update() self.trigger_update()
def toggle_update(self): def toggle_update(self):
print "deprecated: toggle_update was renamed to update_toggle for easier access"
self.update_toggle()
def update_toggle(self):
self.update_model(not self.update_model()) self.update_model(not self.update_model())
def trigger_update(self, trigger_parent=True): def trigger_update(self, trigger_parent=True):

View file

@ -6,7 +6,8 @@ from gp import GP
from parameterization.param import Param from parameterization.param import Param
from ..inference.latent_function_inference import var_dtc from ..inference.latent_function_inference import var_dtc
from .. import likelihoods from .. import likelihoods
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior, NormalPosterior
from ..util.linalg import mdot
import logging import logging
from GPy.inference.latent_function_inference.posterior import Posterior from GPy.inference.latent_function_inference.posterior import Posterior
@ -102,7 +103,15 @@ class SparseGP(GP):
def _raw_predict(self, Xnew, full_cov=False, kern=None): def _raw_predict(self, Xnew, full_cov=False, kern=None):
""" """
Make a prediction for the latent function values Make a prediction for the latent function values.
For certain inputs we give back a full_cov of shape NxN,
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
we take only the diagonal elements across N.
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
This is for both with and without missing data.
""" """
if kern is None: kern = self.kern if kern is None: kern = self.kern
@ -121,15 +130,32 @@ class SparseGP(GP):
Kxx = kern.Kdiag(Xnew) Kxx = kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else: else:
Kx = kern.psi1(self.Z, Xnew).T psi0_star = self.kern.psi0(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) psi1_star = self.kern.psi1(self.Z, Xnew)
if full_cov: #psi2_star = self.kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
Kxx = kern.K(Xnew.mean) la = self.posterior.woodbury_vector
if self.posterior.woodbury_inv.ndim == 2: mu = np.dot(psi1_star, la) # TODO: dimensions?
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: if full_cov:
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
else: di = np.diag_indices(la.shape[1])
Kxx = kern.psi0(self.Z, Xnew) else:
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T var = np.empty((Xnew.shape[0], la.shape[1]))
for i in range(Xnew.shape[0]):
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
psi2_star = self.kern.psi2(self.Z, NormalPosterior(_mu, _var))
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
var_ = mdot(la.T, tmp, la)
p0 = psi0_star[i]
t = np.atleast_3d(self.posterior.woodbury_inv)
t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
if full_cov:
var_[di] += p0
var_[di] += -t2
var[i] = var_
else:
var[i] = np.diag(var_)+p0-t2
return mu, var return mu, var

View file

@ -25,23 +25,18 @@ class SVGP(SparseGP):
Hensman, Matthews and Ghahramani, Scalable Variational GP Classification, ArXiv 1411.2005 Hensman, Matthews and Ghahramani, Scalable Variational GP Classification, ArXiv 1411.2005
""" """
if batchsize is None:
batchsize = X.shape[0]
self.X_all, self.Y_all = X, Y
# how to rescale the batch likelihood in case of minibatches
self.batchsize = batchsize self.batchsize = batchsize
batch_scale = float(self.X_all.shape[0])/float(self.batchsize) self.X_all, self.Y_all = X, Y
#KL_scale = 1./np.float64(self.mpi_comm.size) if batchsize is None:
KL_scale = 1.0 X_batch, Y_batch = X, Y
else:
import climin.util import climin.util
#Make a climin slicer to make drawing minibatches much quicker #Make a climin slicer to make drawing minibatches much quicker
self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize) self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
X_batch, Y_batch = self.new_batch() X_batch, Y_batch = self.new_batch()
#create the SVI inference method #create the SVI inference method
inf_method = svgp_inf(KL_scale=KL_scale, batch_scale=batch_scale) inf_method = svgp_inf()
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method, SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
name=name, Y_metadata=Y_metadata, normalizer=False) name=name, Y_metadata=Y_metadata, normalizer=False)
@ -53,7 +48,7 @@ class SVGP(SparseGP):
self.link_parameter(self.m) self.link_parameter(self.m)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
#update the kernel gradients #update the kernel gradients
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z) self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)

View file

@ -4,48 +4,90 @@
import numpy as np import numpy as np
import sys import sys
import time
def exponents(fnow, current_grad): def exponents(fnow, current_grad):
exps = [np.abs(np.float(fnow)), current_grad] exps = [np.abs(np.float(fnow)), current_grad]
return np.sign(exps) * np.log10(exps).astype(int) return np.sign(exps) * np.log10(exps).astype(int)
class VerboseOptimization(object): class VerboseOptimization(object):
def __init__(self, model, maxiters, verbose=True, current_iteration=0, ipython_notebook=False): def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True):
self.verbose = verbose self.verbose = verbose
if self.verbose: if self.verbose:
self.model = model self.model = model
self.iteration = current_iteration self.iteration = current_iteration
self.ipython_notebook = ipython_notebook
self.p_iter = self.iteration self.p_iter = self.iteration
self.maxiters = maxiters self.maxiters = maxiters
self.len_maxiters = len(str(maxiters)) self.len_maxiters = len(str(maxiters))
self.opt_name = opt.opt_name
self.model.add_observer(self, self.print_status) self.model.add_observer(self, self.print_status)
self.status = 'running'
self.update() self.update()
if self.ipython_notebook: try:
from IPython.display import display from IPython.display import display
from IPython.html.widgets import IntProgressWidget, HTMLWidget from IPython.html.widgets import FloatProgressWidget, HTMLWidget, ContainerWidget
self.text = HTMLWidget() self.text = HTMLWidget()
display(self.text) self.progress = FloatProgressWidget()
self.progress = IntProgressWidget() self.model_show = HTMLWidget()
display(self.progress) self.ipython_notebook = ipython_notebook
except:
# Not in Ipython notebook
self.ipython_notebook = False
if self.ipython_notebook:
self.text.set_css('width', '100%')
#self.progress.set_css('width', '100%')
left_col = ContainerWidget(children = [self.progress, self.text])
right_col = ContainerWidget(children = [self.model_show])
hor_align = ContainerWidget(children = [left_col, right_col])
display(hor_align)
left_col.set_css({
'padding': '2px',
'width': "100%",
})
right_col.set_css({
'padding': '2px',
})
hor_align.set_css({
'width': "100%",
})
hor_align.remove_class('vbox')
hor_align.add_class('hbox')
left_col.add_class("box-flex1")
right_col.add_class('box-flex0')
#self.text.add_class('box-flex2')
#self.progress.add_class('box-flex1')
else: else:
self.exps = exponents(self.fnow, self.current_gradient) self.exps = exponents(self.fnow, self.current_gradient)
print ' {0:{mi}s} {1:11s} {2:11s}'.format("I", "F", "|g|", mi=self.len_maxiters) print 'Running {} Code:'.format(self.opt_name)
print ' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters)
def __enter__(self): def __enter__(self):
self.start = time.time()
return self return self
def print_out(self): def print_out(self):
if self.ipython_notebook: if self.ipython_notebook:
names_vals = [['Iteration', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)], names_vals = [['optimizer', "{:s}".format(self.opt_name)],
['f', "{: > 05.3E}".format(self.fnow)], ['runtime [s]', "{:> g}".format(time.time()-self.start)],
['||Gradient||', "{: >+05.3E}".format(float(self.current_gradient))], ['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)],
['objective', "{: > 12.3E}".format(self.fnow)],
['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))],
['status', "{:s}".format(self.status)],
] ]
#message = "Lik:{:5.3E} Grad:{:5.3E} Lik:{:5.3E} Len:{!s}".format(float(m.log_likelihood()), np.einsum('i,i->', grads, grads), float(m.likelihood.variance), " ".join(["{:3.2E}".format(l) for l in m.kern.lengthscale.values])) #message = "Lik:{:5.3E} Grad:{:5.3E} Lik:{:5.3E} Len:{!s}".format(float(m.log_likelihood()), np.einsum('i,i->', grads, grads), float(m.likelihood.variance), " ".join(["{:3.2E}".format(l) for l in m.kern.lengthscale.values]))
html_begin = """<style type="text/css"> html_begin = """<style type="text/css">
.tg-opt {font-family:"Courier New", Courier, monospace !important;padding:2px 3px;word-break:normal;border-collapse:collapse;border-spacing:0;border-color:#DCDCDC;margin:0px auto;} .tg-opt {font-family:"Courier New", Courier, monospace !important;padding:2px 3px;word-break:normal;border-collapse:collapse;border-spacing:0;border-color:#DCDCDC;margin:0px auto;width:100%;}
.tg-opt td{font-family:"Courier New", Courier, monospace !important;font-weight:bold;color:#444;background-color:#F7FDFA;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;} .tg-opt td{font-family:"Courier New", Courier, monospace !important;font-weight:bold;color:#444;background-color:#F7FDFA;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
.tg-opt th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;} .tg-opt th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
.tg-opt .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;} .tg-opt .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;}
@ -61,6 +103,7 @@ class VerboseOptimization(object):
html_body += "</tr>" html_body += "</tr>"
self.text.value = html_begin + html_body + html_end self.text.value = html_begin + html_body + html_end
self.progress.value = 100*(self.iteration+1)/self.maxiters self.progress.value = 100*(self.iteration+1)/self.maxiters
self.model_show.value = self.model._repr_html_()
else: else:
n_exps = exponents(self.fnow, self.current_gradient) n_exps = exponents(self.fnow, self.current_gradient)
if self.iteration - self.p_iter >= 20 * np.random.rand(): if self.iteration - self.p_iter >= 20 * np.random.rand():
@ -72,7 +115,7 @@ class VerboseOptimization(object):
if b: if b:
self.exps = n_exps self.exps = n_exps
print '\r', print '\r',
print '{0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), mi=self.len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print '{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
def print_status(self, me, which=None): def print_status(self, me, which=None):
@ -91,6 +134,17 @@ class VerboseOptimization(object):
else: else:
self.current_gradient = np.nan self.current_gradient = np.nan
def finish(self, opt):
self.status = opt.status
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
if self.verbose: if self.verbose:
self.model.remove_observer(self) self.stop = time.time()
self.model.remove_observer(self)
self.print_out()
if not self.ipython_notebook:
print ''
print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)
print 'Optimization status: {0:.5g}'.format(self.status)
print

View file

@ -1,2 +1,3 @@
import latent_function_inference import latent_function_inference
import optimization import optimization
import mcmc

View file

@ -5,11 +5,8 @@ import numpy as np
from posterior import Posterior from posterior import Posterior
class SVGP(LatentFunctionInference): class SVGP(LatentFunctionInference):
def __init__(self, KL_scale=1., batch_scale=1.):
self.KL_scale = KL_scale
self.batch_scale = batch_scale
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
num_data, num_outputs = Y.shape num_data, num_outputs = Y.shape
@ -44,12 +41,9 @@ class SVGP(LatentFunctionInference):
dKL_dS = 0.5*(Kmmi[:,:,None] - Si) dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T) dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
KL_scale = self.KL_scale
batch_scale = self.batch_scale
KL, dKL_dKmm, dKL_dS, dKL_dm = KL_scale*KL, KL_scale*dKL_dKmm, KL_scale*dKL_dS, KL_scale*dKL_dm
#quadrature for the likelihood #quadrature for the likelihood
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v) F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
#rescale the F term if working on a batch #rescale the F term if working on a batch
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale

View file

@ -21,7 +21,7 @@ class VarDTC(LatentFunctionInference):
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it. For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
""" """
const_jitter = 1e-6 const_jitter = 1e-8
def __init__(self, limit=1): def __init__(self, limit=1):
#self._YYTfactor_cache = caching.cache() #self._YYTfactor_cache = caching.cache()
from ...util.caching import Cacher from ...util.caching import Cacher

View file

@ -24,7 +24,7 @@ class VarDTC_minibatch(LatentFunctionInference):
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it. For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
""" """
const_jitter = 1e-6 const_jitter = 1e-8
def __init__(self, batchsize=None, limit=1, mpi_comm=None): def __init__(self, batchsize=None, limit=1, mpi_comm=None):
self.batchsize = batchsize self.batchsize = batchsize

View file

@ -1,4 +1,4 @@
# ## Copyright (c) 2014, Zhenwen Dai # ## Copyright (c) 2014 Mu Niu, Zhenwen Dai and GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np

View file

@ -37,7 +37,7 @@ class Optimizer():
self.x_opt = None self.x_opt = None
self.funct_eval = None self.funct_eval = None
self.status = None self.status = None
self.max_f_eval = int(max_f_eval) self.max_f_eval = int(max_iters)
self.max_iters = int(max_iters) self.max_iters = int(max_iters)
self.bfgs_factor = bfgs_factor self.bfgs_factor = bfgs_factor
self.trace = None self.trace = None

View file

@ -61,6 +61,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
function_eval = 1 function_eval = 1
fnow = fold fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient. gradnew = gradf(x, *optargs) # Initial gradient.
function_eval += 1
#if any(np.isnan(gradnew)): #if any(np.isnan(gradnew)):
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" # raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew) current_grad = np.dot(gradnew, gradnew)
@ -96,6 +97,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
sigma = sigma0 / np.sqrt(kappa) sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma * d xplus = x + sigma * d
gplus = gradf(xplus, *optargs) gplus = gradf(xplus, *optargs)
function_eval += 1
theta = np.dot(d, (gplus - gradnew)) / sigma theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha. # Increase effective curvature and evaluate step size alpha.
@ -111,10 +113,10 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
fnew = f(xnew, *optargs) fnew = f(xnew, *optargs)
function_eval += 1 function_eval += 1
# if function_eval >= max_f_eval: if function_eval >= max_f_eval:
# status = "maximum number of function evaluations exceeded" status = "maximum number of function evaluations exceeded"
# break break
# return x, flog, function_eval, status return x, flog, function_eval, status
Delta = 2.*(fnew - fold) / (alpha * mu) Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.: if Delta >= 0.:
@ -156,6 +158,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
# Update variables for new position # Update variables for new position
gradold = gradnew gradold = gradnew
gradnew = gradf(x, *optargs) gradnew = gradf(x, *optargs)
function_eval += 1
current_grad = np.dot(gradnew, gradnew) current_grad = np.dot(gradnew, gradnew)
fold = fnew fold = fnew
# If the gradient is zero then we are done. # If the gradient is zero then we are done.

View file

@ -1,7 +1,7 @@
from _src.kern import Kern from _src.kern import Kern
from _src.rbf import RBF from _src.rbf import RBF
from _src.linear import Linear, LinearFull from _src.linear import Linear, LinearFull
from _src.static import Bias, White from _src.static import Bias, White, Fixed
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
@ -13,7 +13,11 @@ from _src.ODE_UYC import ODE_UYC
from _src.ODE_st import ODE_st from _src.ODE_st import ODE_st
from _src.ODE_t import ODE_t from _src.ODE_t import ODE_t
from _src.poly import Poly from _src.poly import Poly
<<<<<<< HEAD
from _src.spline import Spline from _src.spline import Spline
=======
from _src.eq_ode2 import EQ_ODE2
>>>>>>> upstream/devel
from _src.trunclinear import TruncLinear,TruncLinear_inf from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern from _src.splitKern import SplitKern,DiffGenomeKern

View file

@ -127,7 +127,7 @@ class Coregionalize(Kern):
config.set('weave', 'working', 'False') config.set('weave', 'working', 'False')
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2) dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
else: else:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2) dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
@ -154,9 +154,9 @@ class Coregionalize(Kern):
def _gradient_reduce_numpy(self, dL_dK, index, index2): def _gradient_reduce_numpy(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0] index, index2 = index[:,0], index2[:,0]
dL_dK_small = np.zeros_like(self.B) dL_dK_small = np.zeros_like(self.B)
for i in range(k.output_dim): for i in range(self.output_dim):
tmp1 = dL_dK[index==i] tmp1 = dL_dK[index==i]
for j in range(k.output_dim): for j in range(self.output_dim):
dL_dK_small[j,i] = tmp1[:,index2==j].sum() dL_dK_small[j,i] = tmp1[:,index2==j].sum()
return dL_dK_small return dL_dK_small

1331
GPy/kern/_src/eq_ode2.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -42,25 +42,41 @@ class Prod(CombinationKernel):
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
k = self.K(X,X2)*dL_dK if len(self.parts)==2:
for p in self.parts: self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
p.update_gradients_full(k/p.K(X,X2),X,X2) self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2)
else:
k = self.K(X,X2)*dL_dK
for p in self.parts:
p.update_gradients_full(k/p.K(X,X2),X,X2)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
k = self.Kdiag(X)*dL_dKdiag if len(self.parts)==2:
for p in self.parts: self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
p.update_gradients_diag(k/p.Kdiag(X),X) self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
else:
k = self.Kdiag(X)*dL_dKdiag
for p in self.parts:
p.update_gradients_diag(k/p.Kdiag(X),X)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
k = self.K(X,X2)*dL_dK if len(self.parts)==2:
for p in self.parts: target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
target += p.gradients_X(k/p.K(X,X2),X,X2) target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
else:
k = self.K(X,X2)*dL_dK
for p in self.parts:
target += p.gradients_X(k/p.K(X,X2),X,X2)
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
k = self.Kdiag(X)*dL_dKdiag if len(self.parts)==2:
for p in self.parts: target += self.parts[0].gradients_X_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
target += p.gradients_X_diag(k/p.Kdiag(X),X) target += self.parts[1].gradients_X_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
else:
k = self.Kdiag(X)*dL_dKdiag
for p in self.parts:
target += p.gradients_X_diag(k/p.Kdiag(X),X)
return target return target

View file

@ -1,4 +1,4 @@
# Copyright (c) 2014, James Hensman # Copyright (c) 2015, Thomas Hornung
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
@ -7,7 +7,12 @@ from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class Spline(Kern): class Spline(Kern):
""" """
Spline kernel Linear spline kernel. You need to specify 2 parameters: the variance and c.
The variance is defined in powers of 10. Thus specifying -2 means 10^-2.
The parameter c allows to define the stiffness of the spline fit. A very stiff
spline equals linear regression.
See https://www.youtube.com/watch?v=50Vgw11qn0o starting at minute 1:17:28
Lit: Wahba, 1990
""" """
def __init__(self, input_dim, variance=1., c=1., active_dims=None, name='spline'): def __init__(self, input_dim, variance=1., c=1., active_dims=None, name='spline'):

View file

@ -6,3 +6,4 @@ from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise from mixed_noise import MixedNoise
from binomial import Binomial

View file

@ -77,6 +77,32 @@ class Bernoulli(Likelihood):
return Z_hat, mu_hat, sigma2_hat return Z_hat, mu_hat, sigma2_hat
def variational_expectations(self, Y, m, v, gh_points=None):
if isinstance(self.gp_link, link_functions.Probit):
if gh_points is None:
gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
else:
gh_x, gh_w = gh_points
from scipy import stats
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
Ysign = np.where(Y==1,1,-1)
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None]
p = stats.norm.cdf(X)
p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
N = stats.norm.pdf(X)
F = np.log(p).dot(gh_w)
NoverP = N/p
dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(gh_w)
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), None
else:
raise NotImplementedError
def predictive_mean(self, mu, variance, Y_metadata=None): def predictive_mean(self, mu, variance, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):

125
GPy/likelihoods/binomial.py Normal file
View file

@ -0,0 +1,125 @@
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from likelihood import Likelihood
from scipy import special
class Binomial(Likelihood):
"""
Binomial likelihood
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note::
Y takes values in either {-1, 1} or {0, 1}.
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
.. See also::
likelihood.py, for the parent class
"""
def __init__(self, gp_link=None):
if gp_link is None:
gp_link = link_functions.Probit()
super(Binomial, self).__init__(gp_link, 'Binomial')
def conditional_mean(self, gp, Y_metadata):
return self.gp_link(gp)*Y_metadata['trials']
def pdf_link(self, inv_link_f, y, Y_metadata):
"""
Likelihood function given inverse link of f.
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata must contain 'trials'
:returns: likelihood evaluated for this point
:rtype: float
.. Note:
Each y_i must be in {0, 1}
"""
return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata))
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Log Likelihood function given inverse link of f.
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata must contain 'trials'
:returns: log likelihood evaluated at points inverse link of f.
:rtype: float
"""
N = Y_metadata['trials']
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata must contain 'trials'
:returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array
"""
N = Y_metadata['trials']
return y/inv_link_f - (N-y)/(1-inv_link_f)
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
"""
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
.. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in binomial
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
"""
N = Y_metadata['trials']
return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f)
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
N = Y_metadata['trials']
Ysim = np.random.binomial(N, self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass

View file

@ -131,7 +131,7 @@ class Likelihood(Parameterized):
return z, mean, variance return z, mean, variance
def variational_expectations(self, Y, m, v, gh_points=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
""" """
Use Gauss-Hermite Quadrature to compute Use Gauss-Hermite Quadrature to compute
@ -158,9 +158,9 @@ class Likelihood(Parameterized):
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid. #evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully. # broadcast needs to be handled carefully.
logp = self.logpdf(X,Y[:,None]) logp = self.logpdf(X,Y[:,None], Y_metadata=Y_metadata)
dlogp_dx = self.dlogpdf_df(X, Y[:,None]) dlogp_dx = self.dlogpdf_df(X, Y[:,None], Y_metadata=Y_metadata)
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None]) d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None], Y_metadata=Y_metadata)
#clipping for numerical stability #clipping for numerical stability
#logp = np.clip(logp,-1e9,1e9) #logp = np.clip(logp,-1e9,1e9)
@ -178,7 +178,7 @@ class Likelihood(Parameterized):
stop stop
dF_dtheta = None # Not yet implemented dF_dtheta = None # Not yet implemented
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), None return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta
def predictive_mean(self, mu, variance, Y_metadata=None): def predictive_mean(self, mu, variance, Y_metadata=None):
""" """

View file

@ -64,8 +64,7 @@ class Poisson(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape return -link_f + y*np.log(link_f) - special.gammaln(y+1)
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
@ -83,7 +82,6 @@ class Poisson(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return y/link_f - 1 return y/link_f - 1
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
@ -107,12 +105,7 @@ class Poisson(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape return -y/(link_f**2)
hess = -y/(link_f**2)
return hess
#d2_df = self.gp_link.d2transf_df2(gp)
#transf = self.gp_link.transf(gp)
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """

View file

@ -83,8 +83,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices) posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw)
if self.has_uncertain_inputs(): if self.has_uncertain_inputs():
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations( current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(

View file

@ -3,6 +3,7 @@
import numpy as np import numpy as np
from ..core.parameterization.param import Param from ..core.parameterization.param import Param
from ..core.sparse_gp import SparseGP
from ..core.gp import GP from ..core.gp import GP
from ..inference.latent_function_inference import var_dtc from ..inference.latent_function_inference import var_dtc
from .. import likelihoods from .. import likelihoods
@ -16,14 +17,9 @@ from GPy.inference.optimization.stochastics import SparseGPStochastics,\
#SparseGPMissing #SparseGPMissing
logger = logging.getLogger("sparse gp") logger = logging.getLogger("sparse gp")
class SparseGPMiniBatch(GP): class SparseGPMiniBatch(SparseGP):
""" """
A general purpose Sparse GP model A general purpose Sparse GP model, allowing missing data and stochastics across dimensions.
'''
Created on 3 Nov 2014
@author: maxz
'''
This model allows (approximate) inference using variational DTC or FITC This model allows (approximate) inference using variational DTC or FITC
(Gaussian likelihoods) as well as non-conjugate sparse methods based on (Gaussian likelihoods) as well as non-conjugate sparse methods based on
@ -97,7 +93,7 @@ Created on 3 Nov 2014
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs):
""" """
This is the standard part, which usually belongs in parameters_changed. This is the standard part, which usually belongs in parameters_changed.
@ -117,7 +113,7 @@ Created on 3 Nov 2014
algorithm. algorithm.
""" """
try: try:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None) posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs)
except: except:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata) posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
current_values = {} current_values = {}
@ -315,34 +311,3 @@ Created on 3 Nov 2014
else: else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self._outer_values_update(self.full_values) self._outer_values_update(self.full_values)
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
Make a prediction for the latent function values
"""
if kern is None: kern = self.kern
if not isinstance(Xnew, VariationalPosterior):
Kx = kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = var
else:
Kxx = kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else:
Kx = kern.psi1(self.Z, Xnew)
mu = np.dot(Kx, self.posterior.woodbury_vector)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = kern.psi0(self.Z, Xnew)
psi2 = kern.psi2(self.Z, Xnew)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var

View file

@ -172,7 +172,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if hasattr(model,"Z"): if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = Z[:,free_dims] Zu = Z[:,free_dims]
plots['inducing_inputs'] = ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo') plots['inducing_inputs'] = ax.plot(Zu[:,0], Zu[:,1], 'wo')
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -0,0 +1,37 @@
import numpy as np
import scipy as sp
from ..util.linalg import jitchol
class LinalgTests(np.testing.TestCase):
def setUp(self):
#Create PD matrix
A = np.random.randn(20,100)
self.A = A.dot(A.T)
#compute Eigdecomp
vals, vectors = np.linalg.eig(self.A)
#Set smallest eigenval to be negative with 5 rounds worth of jitter
vals[vals.argmin()] = 0
default_jitter = 1e-6*np.mean(vals)
vals[vals.argmin()] = -default_jitter*(10**3.5)
self.A_corrupt = (vectors * vals).dot(vectors.T)
def test_jitchol_success(self):
"""
Expect 5 rounds of jitter to be added and for the recovered matrix to be
identical to the corrupted matrix apart from the jitter added to the diagonal
"""
L = jitchol(self.A_corrupt, maxtries=5)
A_new = L.dot(L.T)
diff = A_new - self.A_corrupt
np.testing.assert_allclose(diff, np.eye(A_new.shape[0])*np.diag(diff).mean(), atol=1e-13)
def test_jitchol_failure(self):
try:
"""
Expecting an exception to be thrown as we expect it to require
5 rounds of jitter to be added to enforce PDness
"""
jitchol(self.A_corrupt, maxtries=4)
return False
except sp.linalg.LinAlgError:
return True

View file

@ -178,6 +178,24 @@ class MiscTests(unittest.TestCase):
m.optimize() m.optimize()
print m print m
def test_model_updates(self):
Y1 = np.random.normal(0, 1, (40, 13))
Y2 = np.random.normal(0, 1, (40, 6))
m = GPy.models.MRD([Y1, Y2], 5)
self.count = 0
m.add_observer(self, self._count_updates, -2000)
m.update_model(False)
m['.*Gaussian'] = .001
self.assertEquals(self.count, 0)
m['.*Gaussian'].constrain_bounded(0,.01)
self.assertEquals(self.count, 0)
m.Z.fix()
self.assertEquals(self.count, 0)
m.update_model(True)
self.assertEquals(self.count, 1)
def _count_updates(self, me, which):
self.count+=1
def test_model_optimize(self): def test_model_optimize(self):
X = np.random.uniform(-3., 3., (20, 1)) X = np.random.uniform(-3., 3., (20, 1))
Y = np.sin(X) + np.random.randn(20, 1) * 0.05 Y = np.sin(X) + np.random.randn(20, 1) * 0.05

View file

@ -138,8 +138,6 @@ class Test(ListDictTestCase):
self.assertIsNot(par.gradient_full, pcopy.gradient_full) self.assertIsNot(par.gradient_full, pcopy.gradient_full)
self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0)) self.assert_(np.any(pcopy.gradient!=0.0))
pcopy.optimize('bfgs')
par.optimize('bfgs')
np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=1e-6) np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=1e-6)
par.randomize() par.randomize()
with tempfile.TemporaryFile('w+b') as f: with tempfile.TemporaryFile('w+b') as f:

View file

@ -82,6 +82,7 @@ def force_F_ordered(A):
# return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1) # return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
def jitchol(A, maxtries=5): def jitchol(A, maxtries=5):
A = np.ascontiguousarray(A) A = np.ascontiguousarray(A)
L, info = lapack.dpotrf(A, lower=1) L, info = lapack.dpotrf(A, lower=1)
@ -92,25 +93,19 @@ def jitchol(A, maxtries=5):
if np.any(diagA <= 0.): if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements" raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6 jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter): num_tries = 1
while num_tries <= maxtries and np.isfinite(jitter):
try: try:
L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True) L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
logging.warning('Added {} rounds of jitter, jitter of {:.10e}\n'.format(num_tries, jitter))
return L
except: except:
jitter *= 10 jitter *= 10
finally: num_tries += 1
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."
import traceback import traceback
try: raise logging.warning('\n'.join(['Added {} rounds of jitter, jitter of {:.10e}'.format(num_tries-1, jitter),
except: ' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter), raise linalg.LinAlgError, "not positive definite, even with jitter."
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
import ipdb;ipdb.set_trace()
return L
# def dtrtri(L, lower=1): # def dtrtri(L, lower=1):
# """ # """