mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-30 15:26:23 +02:00
merge
This commit is contained in:
commit
65f408de9a
45 changed files with 1523 additions and 1347 deletions
|
|
@ -381,6 +381,16 @@ class Model(Parameterized):
|
||||||
self.optimizer_array = x
|
self.optimizer_array = x
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def _repr_html_(self):
|
||||||
|
"""Representation of the model in html for notebook display."""
|
||||||
|
model_details = [['<b>Model</b>', self.name + '<br>'],
|
||||||
|
['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
|
||||||
|
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)]]
|
||||||
|
from operator import itemgetter
|
||||||
|
to_print = [""] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["<br><b>Parameters</b>:"]
|
||||||
|
to_print.append(super(Model, self)._repr_html_())
|
||||||
|
return "\n".join(to_print)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
model_details = [['Name', self.name],
|
model_details = [['Name', self.name],
|
||||||
['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
|
['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,9 @@ class Param(Parameterizable, ObsAr):
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
|
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
|
||||||
|
self._in_init_ = True
|
||||||
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
|
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
|
||||||
|
self._in_init_ = False
|
||||||
|
|
||||||
def build_pydot(self,G):
|
def build_pydot(self,G):
|
||||||
import pydot
|
import pydot
|
||||||
|
|
@ -249,6 +251,29 @@ class Param(Parameterizable, ObsAr):
|
||||||
if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
|
if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
|
||||||
else: indstr = ','.join(map(str, ind))
|
else: indstr = ','.join(map(str, ind))
|
||||||
return name + '[' + indstr + ']'
|
return name + '[' + indstr + ']'
|
||||||
|
|
||||||
|
def _repr_html_(self, constr_matrix=None, indices=None, prirs=None, ties=None):
|
||||||
|
"""Representation of the parameter in html for notebook display."""
|
||||||
|
filter_ = self._current_slice_
|
||||||
|
vals = self.flat
|
||||||
|
if indices is None: indices = self._indices(filter_)
|
||||||
|
ravi = self._raveled_index(filter_)
|
||||||
|
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
|
||||||
|
if prirs is None: prirs = self.priors.properties_for(ravi)
|
||||||
|
if ties is None: ties = self._ties_for(ravi)
|
||||||
|
ties = [' '.join(map(lambda x: x, t)) for t in ties]
|
||||||
|
header_format = """
|
||||||
|
<tr>
|
||||||
|
<td><b>{i}</b></td>
|
||||||
|
<td><b>{x}</b></td>
|
||||||
|
<td><b>{c}</b></td>
|
||||||
|
<td><b>{p}</b></td>
|
||||||
|
<td><b>{t}</b></td>
|
||||||
|
</tr>"""
|
||||||
|
header = header_format.format(x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
|
||||||
|
if not ties: ties = itertools.cycle([''])
|
||||||
|
return "\n".join(['<table>'] + [header] + ["<tr><td>{i}</td><td align=\"right\">{x}</td><td>{c}</td><td>{p}</td><td>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
|
||||||
|
|
||||||
def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
|
def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
|
||||||
filter_ = self._current_slice_
|
filter_ = self._current_slice_
|
||||||
vals = self.flat
|
vals = self.flat
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ import numpy as np
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
__updated__ = '2014-09-22'
|
__updated__ = '2014-10-22'
|
||||||
|
|
||||||
class HierarchyError(Exception):
|
class HierarchyError(Exception):
|
||||||
"""
|
"""
|
||||||
|
|
@ -99,7 +99,7 @@ class Observable(object):
|
||||||
|
|
||||||
:param bool trigger_parent: Whether to trigger the parent, after self has updated
|
:param bool trigger_parent: Whether to trigger the parent, after self has updated
|
||||||
"""
|
"""
|
||||||
if not self.update_model():
|
if not self.update_model() or self._in_init_:
|
||||||
#print "Warning: updates are off, updating the model will do nothing"
|
#print "Warning: updates are off, updating the model will do nothing"
|
||||||
return
|
return
|
||||||
self._trigger_params_changed(trigger_parent)
|
self._trigger_params_changed(trigger_parent)
|
||||||
|
|
@ -784,7 +784,7 @@ class OptimizationHandlable(Indexable):
|
||||||
constraint to it.
|
constraint to it.
|
||||||
"""
|
"""
|
||||||
self._highest_parent_.tie.collate_gradient()
|
self._highest_parent_.tie.collate_gradient()
|
||||||
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||||
if self._has_fixes(): return g[self._fixes_]
|
if self._has_fixes(): return g[self._fixes_]
|
||||||
return g
|
return g
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -357,6 +357,35 @@ class Parameterized(Parameterizable):
|
||||||
@property
|
@property
|
||||||
def _ties_str(self):
|
def _ties_str(self):
|
||||||
return [','.join(x._ties_str) for x in self.flattened_parameters]
|
return [','.join(x._ties_str) for x in self.flattened_parameters]
|
||||||
|
|
||||||
|
def _repr_html_(self, header=True):
|
||||||
|
"""Representation of the parameters in html for notebook display."""
|
||||||
|
name = adjust_name_for_printing(self.name) + "."
|
||||||
|
constrs = self._constraints_str;
|
||||||
|
ts = self._ties_str
|
||||||
|
prirs = self._priors_str
|
||||||
|
desc = self._description_str; names = self.parameter_names()
|
||||||
|
nl = max([len(str(x)) for x in names + [name]])
|
||||||
|
sl = max([len(str(x)) for x in desc + ["Value"]])
|
||||||
|
cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
|
||||||
|
tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
|
||||||
|
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
|
||||||
|
format_spec = "<tr><td>{{name:<{0}s}}</td><td align=\"right\">{{desc:>{1}s}}</td><td>{{const:^{2}s}}</td><td>{{pri:^{3}s}}</td><td>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl)
|
||||||
|
to_print = []
|
||||||
|
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
|
||||||
|
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
|
||||||
|
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
|
||||||
|
if header:
|
||||||
|
header = """
|
||||||
|
<tr>
|
||||||
|
<td><b>{name}</b>
|
||||||
|
<td><b>Value</b></td>
|
||||||
|
<td><b>Constraint</b></td>
|
||||||
|
<td><b>Prior</b></td>
|
||||||
|
<td><b>Tied to</b></td>""".format(name=name)
|
||||||
|
to_print.insert(0, header)
|
||||||
|
return '<table>' + '\n'.format(sep).join(to_print) + '\n</table>'
|
||||||
|
|
||||||
def __str__(self, header=True):
|
def __str__(self, header=True):
|
||||||
name = adjust_name_for_printing(self.name) + "."
|
name = adjust_name_for_printing(self.name) + "."
|
||||||
constrs = self._constraints_str;
|
constrs = self._constraints_str;
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from domains import _REAL, _POSITIVE
|
||||||
import warnings
|
import warnings
|
||||||
import weakref
|
import weakref
|
||||||
|
|
||||||
|
|
||||||
class Prior(object):
|
class Prior(object):
|
||||||
domain = None
|
domain = None
|
||||||
|
|
||||||
|
|
@ -17,13 +18,16 @@ class Prior(object):
|
||||||
|
|
||||||
def plot(self):
|
def plot(self):
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ...plotting.matplot_dep import priors_plots
|
from ...plotting.matplot_dep import priors_plots
|
||||||
|
|
||||||
priors_plots.univariate_plot(self)
|
priors_plots.univariate_plot(self)
|
||||||
|
|
||||||
def __repr__(self, *args, **kwargs):
|
def __repr__(self, *args, **kwargs):
|
||||||
return self.__str__()
|
return self.__str__()
|
||||||
|
|
||||||
|
|
||||||
class Gaussian(Prior):
|
class Gaussian(Prior):
|
||||||
"""
|
"""
|
||||||
Implementation of the univariate Gaussian probability function, coupled with random variables.
|
Implementation of the univariate Gaussian probability function, coupled with random variables.
|
||||||
|
|
@ -36,6 +40,7 @@ class Gaussian(Prior):
|
||||||
"""
|
"""
|
||||||
domain = _REAL
|
domain = _REAL
|
||||||
_instances = []
|
_instances = []
|
||||||
|
|
||||||
def __new__(cls, mu, sigma): # Singleton:
|
def __new__(cls, mu, sigma): # Singleton:
|
||||||
if cls._instances:
|
if cls._instances:
|
||||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
|
@ -45,6 +50,7 @@ class Gaussian(Prior):
|
||||||
o = super(Prior, cls).__new__(cls, mu, sigma)
|
o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||||
cls._instances.append(weakref.ref(o))
|
cls._instances.append(weakref.ref(o))
|
||||||
return cls._instances[-1]()
|
return cls._instances[-1]()
|
||||||
|
|
||||||
def __init__(self, mu, sigma):
|
def __init__(self, mu, sigma):
|
||||||
self.mu = float(mu)
|
self.mu = float(mu)
|
||||||
self.sigma = float(sigma)
|
self.sigma = float(sigma)
|
||||||
|
|
@ -52,7 +58,7 @@ class Gaussian(Prior):
|
||||||
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
|
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
|
return "N({:.2g}, {:.2g})".format(self.mu, self.sigma)
|
||||||
|
|
||||||
def lnpdf(self, x):
|
def lnpdf(self, x):
|
||||||
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
|
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
|
||||||
|
|
@ -67,6 +73,7 @@ class Gaussian(Prior):
|
||||||
class Uniform(Prior):
|
class Uniform(Prior):
|
||||||
domain = _REAL
|
domain = _REAL
|
||||||
_instances = []
|
_instances = []
|
||||||
|
|
||||||
def __new__(cls, lower, upper): # Singleton:
|
def __new__(cls, lower, upper): # Singleton:
|
||||||
if cls._instances:
|
if cls._instances:
|
||||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
|
@ -82,10 +89,10 @@ class Uniform(Prior):
|
||||||
self.upper = float(upper)
|
self.upper = float(upper)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
|
return "[{:.2g}, {:.2g}]".format(self.lower, self.upper)
|
||||||
|
|
||||||
def lnpdf(self, x):
|
def lnpdf(self, x):
|
||||||
region = (x>=self.lower) * (x<=self.upper)
|
region = (x >= self.lower) * (x <= self.upper)
|
||||||
return region
|
return region
|
||||||
|
|
||||||
def lnpdf_grad(self, x):
|
def lnpdf_grad(self, x):
|
||||||
|
|
@ -94,6 +101,7 @@ class Uniform(Prior):
|
||||||
def rvs(self, n):
|
def rvs(self, n):
|
||||||
return np.random.uniform(self.lower, self.upper, size=n)
|
return np.random.uniform(self.lower, self.upper, size=n)
|
||||||
|
|
||||||
|
|
||||||
class LogGaussian(Prior):
|
class LogGaussian(Prior):
|
||||||
"""
|
"""
|
||||||
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
|
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
|
||||||
|
|
@ -106,6 +114,7 @@ class LogGaussian(Prior):
|
||||||
"""
|
"""
|
||||||
domain = _POSITIVE
|
domain = _POSITIVE
|
||||||
_instances = []
|
_instances = []
|
||||||
|
|
||||||
def __new__(cls, mu, sigma): # Singleton:
|
def __new__(cls, mu, sigma): # Singleton:
|
||||||
if cls._instances:
|
if cls._instances:
|
||||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
|
@ -115,6 +124,7 @@ class LogGaussian(Prior):
|
||||||
o = super(Prior, cls).__new__(cls, mu, sigma)
|
o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||||
cls._instances.append(weakref.ref(o))
|
cls._instances.append(weakref.ref(o))
|
||||||
return cls._instances[-1]()
|
return cls._instances[-1]()
|
||||||
|
|
||||||
def __init__(self, mu, sigma):
|
def __init__(self, mu, sigma):
|
||||||
self.mu = float(mu)
|
self.mu = float(mu)
|
||||||
self.sigma = float(sigma)
|
self.sigma = float(sigma)
|
||||||
|
|
@ -122,7 +132,7 @@ class LogGaussian(Prior):
|
||||||
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
|
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
|
return "lnN({:.2g}, {:.2g})".format(self.mu, self.sigma)
|
||||||
|
|
||||||
def lnpdf(self, x):
|
def lnpdf(self, x):
|
||||||
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
|
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
|
||||||
|
|
@ -146,6 +156,7 @@ class MultivariateGaussian:
|
||||||
"""
|
"""
|
||||||
domain = _REAL
|
domain = _REAL
|
||||||
_instances = []
|
_instances = []
|
||||||
|
|
||||||
def __new__(cls, mu, var): # Singleton:
|
def __new__(cls, mu, var): # Singleton:
|
||||||
if cls._instances:
|
if cls._instances:
|
||||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
|
@ -155,6 +166,7 @@ class MultivariateGaussian:
|
||||||
o = super(Prior, cls).__new__(cls, mu, var)
|
o = super(Prior, cls).__new__(cls, mu, var)
|
||||||
cls._instances.append(weakref.ref(o))
|
cls._instances.append(weakref.ref(o))
|
||||||
return cls._instances[-1]()
|
return cls._instances[-1]()
|
||||||
|
|
||||||
def __init__(self, mu, var):
|
def __init__(self, mu, var):
|
||||||
self.mu = np.array(mu).flatten()
|
self.mu = np.array(mu).flatten()
|
||||||
self.var = np.array(var)
|
self.var = np.array(var)
|
||||||
|
|
@ -184,10 +196,13 @@ class MultivariateGaussian:
|
||||||
|
|
||||||
def plot(self):
|
def plot(self):
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ..plotting.matplot_dep import priors_plots
|
from ..plotting.matplot_dep import priors_plots
|
||||||
|
|
||||||
priors_plots.multivariate_plot(self)
|
priors_plots.multivariate_plot(self)
|
||||||
|
|
||||||
|
|
||||||
def gamma_from_EV(E, V):
|
def gamma_from_EV(E, V):
|
||||||
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
|
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
|
||||||
return Gamma.from_EV(E, V)
|
return Gamma.from_EV(E, V)
|
||||||
|
|
@ -205,6 +220,7 @@ class Gamma(Prior):
|
||||||
"""
|
"""
|
||||||
domain = _POSITIVE
|
domain = _POSITIVE
|
||||||
_instances = []
|
_instances = []
|
||||||
|
|
||||||
def __new__(cls, a, b): # Singleton:
|
def __new__(cls, a, b): # Singleton:
|
||||||
if cls._instances:
|
if cls._instances:
|
||||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
|
@ -214,13 +230,14 @@ class Gamma(Prior):
|
||||||
o = super(Prior, cls).__new__(cls, a, b)
|
o = super(Prior, cls).__new__(cls, a, b)
|
||||||
cls._instances.append(weakref.ref(o))
|
cls._instances.append(weakref.ref(o))
|
||||||
return cls._instances[-1]()
|
return cls._instances[-1]()
|
||||||
|
|
||||||
def __init__(self, a, b):
|
def __init__(self, a, b):
|
||||||
self.a = float(a)
|
self.a = float(a)
|
||||||
self.b = float(b)
|
self.b = float(b)
|
||||||
self.constant = -gammaln(self.a) + a * np.log(b)
|
self.constant = -gammaln(self.a) + a * np.log(b)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
|
return "Ga({:.2g}, {:.2g})".format(self.a, self.b)
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
ret = {"E[x]": self.a / self.b, \
|
ret = {"E[x]": self.a / self.b, \
|
||||||
|
|
@ -241,6 +258,7 @@ class Gamma(Prior):
|
||||||
|
|
||||||
def rvs(self, n):
|
def rvs(self, n):
|
||||||
return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
|
return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_EV(E, V):
|
def from_EV(E, V):
|
||||||
"""
|
"""
|
||||||
|
|
@ -275,13 +293,14 @@ class InverseGamma(Prior):
|
||||||
o = super(Prior, cls).__new__(cls, a, b)
|
o = super(Prior, cls).__new__(cls, a, b)
|
||||||
cls._instances.append(weakref.ref(o))
|
cls._instances.append(weakref.ref(o))
|
||||||
return cls._instances[-1]()
|
return cls._instances[-1]()
|
||||||
|
|
||||||
def __init__(self, a, b):
|
def __init__(self, a, b):
|
||||||
self.a = float(a)
|
self.a = float(a)
|
||||||
self.b = float(b)
|
self.b = float(b)
|
||||||
self.constant = -gammaln(self.a) + a * np.log(b)
|
self.constant = -gammaln(self.a) + a * np.log(b)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
|
return "iGa({:.2g}, {:.2g})".format(self.a, self.b)
|
||||||
|
|
||||||
def lnpdf(self, x):
|
def lnpdf(self, x):
|
||||||
return self.constant - (self.a + 1) * np.log(x) - self.b / x
|
return self.constant - (self.a + 1) * np.log(x) - self.b / x
|
||||||
|
|
@ -292,6 +311,349 @@ class InverseGamma(Prior):
|
||||||
def rvs(self, n):
|
def rvs(self, n):
|
||||||
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
|
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
|
||||||
|
|
||||||
|
class DGPLVM_KFDA(Prior):
|
||||||
|
"""
|
||||||
|
Implementation of the Discriminative Gaussian Process Latent Variable function using
|
||||||
|
Kernel Fisher Discriminant Analysis by Seung-Jean Kim for implementing Face paper
|
||||||
|
by Chaochao Lu.
|
||||||
|
|
||||||
|
:param lambdaa: constant
|
||||||
|
:param sigma2: constant
|
||||||
|
|
||||||
|
.. Note:: Surpassing Human-Level Face paper dgplvm implementation
|
||||||
|
|
||||||
|
"""
|
||||||
|
domain = _REAL
|
||||||
|
# _instances = []
|
||||||
|
# def __new__(cls, lambdaa, sigma2): # Singleton:
|
||||||
|
# if cls._instances:
|
||||||
|
# cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
# for instance in cls._instances:
|
||||||
|
# if instance().mu == mu and instance().sigma == sigma:
|
||||||
|
# return instance()
|
||||||
|
# o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||||
|
# cls._instances.append(weakref.ref(o))
|
||||||
|
# return cls._instances[-1]()
|
||||||
|
|
||||||
|
def __init__(self, lambdaa, sigma2, lbl, kern, x_shape):
|
||||||
|
"""A description for init"""
|
||||||
|
self.datanum = lbl.shape[0]
|
||||||
|
self.classnum = lbl.shape[1]
|
||||||
|
self.lambdaa = lambdaa
|
||||||
|
self.sigma2 = sigma2
|
||||||
|
self.lbl = lbl
|
||||||
|
self.kern = kern
|
||||||
|
lst_ni = self.compute_lst_ni()
|
||||||
|
self.a = self.compute_a(lst_ni)
|
||||||
|
self.A = self.compute_A(lst_ni)
|
||||||
|
self.x_shape = x_shape
|
||||||
|
|
||||||
|
def get_class_label(self, y):
|
||||||
|
for idx, v in enumerate(y):
|
||||||
|
if v == 1:
|
||||||
|
return idx
|
||||||
|
return -1
|
||||||
|
|
||||||
|
# This function assigns each data point to its own class
|
||||||
|
# and returns the dictionary which contains the class name and parameters.
|
||||||
|
def compute_cls(self, x):
|
||||||
|
cls = {}
|
||||||
|
# Appending each data point to its proper class
|
||||||
|
for j in xrange(self.datanum):
|
||||||
|
class_label = self.get_class_label(self.lbl[j])
|
||||||
|
if class_label not in cls:
|
||||||
|
cls[class_label] = []
|
||||||
|
cls[class_label].append(x[j])
|
||||||
|
if len(cls) > 2:
|
||||||
|
for i in range(2, self.classnum):
|
||||||
|
del cls[i]
|
||||||
|
return cls
|
||||||
|
|
||||||
|
def x_reduced(self, cls):
|
||||||
|
x1 = cls[0]
|
||||||
|
x2 = cls[1]
|
||||||
|
x = np.concatenate((x1, x2), axis=0)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def compute_lst_ni(self):
|
||||||
|
lst_ni = []
|
||||||
|
lst_ni1 = []
|
||||||
|
lst_ni2 = []
|
||||||
|
f1 = (np.where(self.lbl[:, 0] == 1)[0])
|
||||||
|
f2 = (np.where(self.lbl[:, 1] == 1)[0])
|
||||||
|
for idx in f1:
|
||||||
|
lst_ni1.append(idx)
|
||||||
|
for idx in f2:
|
||||||
|
lst_ni2.append(idx)
|
||||||
|
lst_ni.append(len(lst_ni1))
|
||||||
|
lst_ni.append(len(lst_ni2))
|
||||||
|
return lst_ni
|
||||||
|
|
||||||
|
def compute_a(self, lst_ni):
|
||||||
|
a = np.ones((self.datanum, 1))
|
||||||
|
count = 0
|
||||||
|
for N_i in lst_ni:
|
||||||
|
if N_i == lst_ni[0]:
|
||||||
|
a[count:count + N_i] = (float(1) / N_i) * a[count]
|
||||||
|
count += N_i
|
||||||
|
else:
|
||||||
|
if N_i == lst_ni[1]:
|
||||||
|
a[count: count + N_i] = -(float(1) / N_i) * a[count]
|
||||||
|
count += N_i
|
||||||
|
return a
|
||||||
|
|
||||||
|
def compute_A(self, lst_ni):
|
||||||
|
A = np.zeros((self.datanum, self.datanum))
|
||||||
|
idx = 0
|
||||||
|
for N_i in lst_ni:
|
||||||
|
B = float(1) / np.sqrt(N_i) * (np.eye(N_i) - ((float(1) / N_i) * np.ones((N_i, N_i))))
|
||||||
|
A[idx:idx + N_i, idx:idx + N_i] = B
|
||||||
|
idx += N_i
|
||||||
|
return A
|
||||||
|
|
||||||
|
# Here log function
|
||||||
|
def lnpdf(self, x):
|
||||||
|
x = x.reshape(self.x_shape)
|
||||||
|
K = self.kern.K(x)
|
||||||
|
a_trans = np.transpose(self.a)
|
||||||
|
paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
|
||||||
|
inv_part = pdinv(paran)[0]
|
||||||
|
J = a_trans.dot(K).dot(self.a) - a_trans.dot(K).dot(self.A).dot(inv_part).dot(self.A).dot(K).dot(self.a)
|
||||||
|
J_star = (1. / self.lambdaa) * J
|
||||||
|
return (-1. / self.sigma2) * J_star
|
||||||
|
|
||||||
|
# Here gradient function
|
||||||
|
def lnpdf_grad(self, x):
|
||||||
|
x = x.reshape(self.x_shape)
|
||||||
|
K = self.kern.K(x)
|
||||||
|
paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
|
||||||
|
inv_part = pdinv(paran)[0]
|
||||||
|
b = self.A.dot(inv_part).dot(self.A).dot(K).dot(self.a)
|
||||||
|
a_Minus_b = self.a - b
|
||||||
|
a_b_trans = np.transpose(a_Minus_b)
|
||||||
|
DJ_star_DK = (1. / self.lambdaa) * (a_Minus_b.dot(a_b_trans))
|
||||||
|
DJ_star_DX = self.kern.gradients_X(DJ_star_DK, x)
|
||||||
|
return (-1. / self.sigma2) * DJ_star_DX
|
||||||
|
|
||||||
|
def rvs(self, n):
|
||||||
|
return np.random.rand(n) # A WRONG implementation
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'DGPLVM_prior'
|
||||||
|
|
||||||
|
|
||||||
|
class DGPLVM(Prior):
|
||||||
|
"""
|
||||||
|
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
|
||||||
|
|
||||||
|
:param sigma2: constant
|
||||||
|
|
||||||
|
.. Note:: DGPLVM for Classification paper implementation
|
||||||
|
|
||||||
|
"""
|
||||||
|
domain = _REAL
|
||||||
|
# _instances = []
|
||||||
|
# def __new__(cls, mu, sigma): # Singleton:
|
||||||
|
# if cls._instances:
|
||||||
|
# cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
# for instance in cls._instances:
|
||||||
|
# if instance().mu == mu and instance().sigma == sigma:
|
||||||
|
# return instance()
|
||||||
|
# o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||||
|
# cls._instances.append(weakref.ref(o))
|
||||||
|
# return cls._instances[-1]()
|
||||||
|
|
||||||
|
def __init__(self, sigma2, lbl, x_shape):
|
||||||
|
self.sigma2 = sigma2
|
||||||
|
# self.x = x
|
||||||
|
self.lbl = lbl
|
||||||
|
self.classnum = lbl.shape[1]
|
||||||
|
self.datanum = lbl.shape[0]
|
||||||
|
self.x_shape = x_shape
|
||||||
|
self.dim = x_shape[1]
|
||||||
|
|
||||||
|
def get_class_label(self, y):
|
||||||
|
for idx, v in enumerate(y):
|
||||||
|
if v == 1:
|
||||||
|
return idx
|
||||||
|
return -1
|
||||||
|
|
||||||
|
# This function assigns each data point to its own class
|
||||||
|
# and returns the dictionary which contains the class name and parameters.
|
||||||
|
def compute_cls(self, x):
|
||||||
|
cls = {}
|
||||||
|
# Appending each data point to its proper class
|
||||||
|
for j in xrange(self.datanum):
|
||||||
|
class_label = self.get_class_label(self.lbl[j])
|
||||||
|
if class_label not in cls:
|
||||||
|
cls[class_label] = []
|
||||||
|
cls[class_label].append(x[j])
|
||||||
|
return cls
|
||||||
|
|
||||||
|
# This function computes mean of each class. The mean is calculated through each dimension
|
||||||
|
def compute_Mi(self, cls):
|
||||||
|
M_i = np.zeros((self.classnum, self.dim))
|
||||||
|
for i in cls:
|
||||||
|
# Mean of each class
|
||||||
|
M_i[i] = np.mean(cls[i], axis=0)
|
||||||
|
return M_i
|
||||||
|
|
||||||
|
# Adding data points as tuple to the dictionary so that we can access indices
|
||||||
|
def compute_indices(self, x):
|
||||||
|
data_idx = {}
|
||||||
|
for j in xrange(self.datanum):
|
||||||
|
class_label = self.get_class_label(self.lbl[j])
|
||||||
|
if class_label not in data_idx:
|
||||||
|
data_idx[class_label] = []
|
||||||
|
t = (j, x[j])
|
||||||
|
data_idx[class_label].append(t)
|
||||||
|
return data_idx
|
||||||
|
|
||||||
|
# Adding indices to the list so we can access whole the indices
|
||||||
|
def compute_listIndices(self, data_idx):
|
||||||
|
lst_idx = []
|
||||||
|
lst_idx_all = []
|
||||||
|
for i in data_idx:
|
||||||
|
if len(lst_idx) == 0:
|
||||||
|
pass
|
||||||
|
#Do nothing, because it is the first time list is created so is empty
|
||||||
|
else:
|
||||||
|
lst_idx = []
|
||||||
|
# Here we put indices of each class in to the list called lst_idx_all
|
||||||
|
for m in xrange(len(data_idx[i])):
|
||||||
|
lst_idx.append(data_idx[i][m][0])
|
||||||
|
lst_idx_all.append(lst_idx)
|
||||||
|
return lst_idx_all
|
||||||
|
|
||||||
|
# This function calculates between classes variances
|
||||||
|
def compute_Sb(self, cls, M_i, M_0):
|
||||||
|
Sb = np.zeros((self.dim, self.dim))
|
||||||
|
for i in cls:
|
||||||
|
B = (M_i[i] - M_0).reshape(self.dim, 1)
|
||||||
|
B_trans = B.transpose()
|
||||||
|
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
|
||||||
|
return Sb
|
||||||
|
|
||||||
|
# This function calculates within classes variances
|
||||||
|
def compute_Sw(self, cls, M_i):
|
||||||
|
Sw = np.zeros((self.dim, self.dim))
|
||||||
|
for i in cls:
|
||||||
|
N_i = float(len(cls[i]))
|
||||||
|
W_WT = np.zeros((self.dim, self.dim))
|
||||||
|
for xk in cls[i]:
|
||||||
|
W = (xk - M_i[i])
|
||||||
|
W_WT += np.outer(W, W)
|
||||||
|
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
|
||||||
|
return Sw
|
||||||
|
|
||||||
|
# Calculating beta and Bi for Sb
|
||||||
|
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
|
||||||
|
# import pdb
|
||||||
|
# pdb.set_trace()
|
||||||
|
B_i = np.zeros((self.classnum, self.dim))
|
||||||
|
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
|
||||||
|
for i in data_idx:
|
||||||
|
# pdb.set_trace()
|
||||||
|
# Calculating Bi
|
||||||
|
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
|
||||||
|
for k in xrange(self.datanum):
|
||||||
|
for i in data_idx:
|
||||||
|
N_i = float(len(data_idx[i]))
|
||||||
|
if k in lst_idx_all[i]:
|
||||||
|
beta = (float(1) / N_i) - (float(1) / self.datanum)
|
||||||
|
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||||
|
else:
|
||||||
|
beta = -(float(1) / self.datanum)
|
||||||
|
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||||
|
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
|
||||||
|
return Sig_beta_B_i_all
|
||||||
|
|
||||||
|
|
||||||
|
# Calculating W_j s separately so we can access all the W_j s anytime
|
||||||
|
def compute_wj(self, data_idx, M_i):
|
||||||
|
W_i = np.zeros((self.datanum, self.dim))
|
||||||
|
for i in data_idx:
|
||||||
|
N_i = float(len(data_idx[i]))
|
||||||
|
for tpl in data_idx[i]:
|
||||||
|
xj = tpl[1]
|
||||||
|
j = tpl[0]
|
||||||
|
W_i[j] = (xj - M_i[i])
|
||||||
|
return W_i
|
||||||
|
|
||||||
|
# Calculating alpha and Wj for Sw
|
||||||
|
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
|
||||||
|
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
|
||||||
|
for i in data_idx:
|
||||||
|
N_i = float(len(data_idx[i]))
|
||||||
|
for tpl in data_idx[i]:
|
||||||
|
k = tpl[0]
|
||||||
|
for j in lst_idx_all[i]:
|
||||||
|
if k == j:
|
||||||
|
alpha = 1 - (float(1) / N_i)
|
||||||
|
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||||
|
else:
|
||||||
|
alpha = 0 - (float(1) / N_i)
|
||||||
|
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||||
|
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
|
||||||
|
return Sig_alpha_W_i
|
||||||
|
|
||||||
|
# This function calculates log of our prior
|
||||||
|
def lnpdf(self, x):
|
||||||
|
x = x.reshape(self.x_shape)
|
||||||
|
cls = self.compute_cls(x)
|
||||||
|
M_0 = np.mean(x, axis=0)
|
||||||
|
M_i = self.compute_Mi(cls)
|
||||||
|
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||||
|
Sw = self.compute_Sw(cls, M_i)
|
||||||
|
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||||
|
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||||
|
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||||
|
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
||||||
|
|
||||||
|
# This function calculates derivative of the log of prior function
|
||||||
|
def lnpdf_grad(self, x):
|
||||||
|
x = x.reshape(self.x_shape)
|
||||||
|
cls = self.compute_cls(x)
|
||||||
|
M_0 = np.mean(x, axis=0)
|
||||||
|
M_i = self.compute_Mi(cls)
|
||||||
|
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||||
|
Sw = self.compute_Sw(cls, M_i)
|
||||||
|
data_idx = self.compute_indices(x)
|
||||||
|
lst_idx_all = self.compute_listIndices(data_idx)
|
||||||
|
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
|
||||||
|
W_i = self.compute_wj(data_idx, M_i)
|
||||||
|
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
|
||||||
|
|
||||||
|
# Calculating inverse of Sb and its transpose and minus
|
||||||
|
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||||
|
# Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||||
|
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||||
|
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
||||||
|
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
||||||
|
Sw_trans = np.transpose(Sw)
|
||||||
|
|
||||||
|
# Calculating DJ/DXk
|
||||||
|
DJ_Dxk = 2 * (
|
||||||
|
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
|
||||||
|
Sig_alpha_W_i))
|
||||||
|
# Calculating derivative of the log of the prior
|
||||||
|
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
|
||||||
|
return DPx_Dx.T
|
||||||
|
|
||||||
|
# def frb(self, x):
|
||||||
|
# from functools import partial
|
||||||
|
# from GPy.models import GradientChecker
|
||||||
|
# f = partial(self.lnpdf)
|
||||||
|
# df = partial(self.lnpdf_grad)
|
||||||
|
# grad = GradientChecker(f, df, x, 'X')
|
||||||
|
# grad.checkgrad(verbose=1)
|
||||||
|
|
||||||
|
def rvs(self, n):
|
||||||
|
return np.random.rand(n) # A WRONG implementation
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'DGPLVM_prior'
|
||||||
|
|
||||||
class HalfT(Prior):
|
class HalfT(Prior):
|
||||||
"""
|
"""
|
||||||
Implementation of the half student t probability function, coupled with random variables.
|
Implementation of the half student t probability function, coupled with random variables.
|
||||||
|
|
@ -317,7 +679,7 @@ class HalfT(Prior):
|
||||||
self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)
|
self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "hT(" + str(np.round(self.A)) + ', ' + str(np.round(self.nu)) + ')'
|
return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
|
||||||
|
|
||||||
def lnpdf(self,theta):
|
def lnpdf(self,theta):
|
||||||
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
|
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
|
||||||
|
|
|
||||||
|
|
@ -27,12 +27,20 @@ class Transformation(object):
|
||||||
if not cls._instance or cls._instance.__class__ is not cls:
|
if not cls._instance or cls._instance.__class__ is not cls:
|
||||||
cls._instance = super(Transformation, cls).__new__(cls, *args, **kwargs)
|
cls._instance = super(Transformation, cls).__new__(cls, *args, **kwargs)
|
||||||
return cls._instance
|
return cls._instance
|
||||||
def f(self, x):
|
def f(self, opt_param):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def finv(self, x):
|
def finv(self, model_param):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, model_param, dL_dmodel_param):
|
||||||
""" df_dx evaluated at self.f(x)=f"""
|
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
|
||||||
|
|
||||||
|
i.e.:
|
||||||
|
define
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
\frac{\frac{\partial L}{\partial f}\left(\left.\partial f(x)}{\partial x}\right|_{x=f^{-1}(f)\right)}
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
""" produce a sensible initial value for f(x)"""
|
""" produce a sensible initial value for f(x)"""
|
||||||
|
|
@ -58,8 +66,8 @@ class Logexp(Transformation):
|
||||||
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
|
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
return np.where(f>_lim_val, 1., 1. - np.exp(-f))
|
return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
if np.any(f < 0.):
|
if np.any(f < 0.):
|
||||||
print "Warning: changing parameters to satisfy constraints"
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
|
@ -68,6 +76,209 @@ class Logexp(Transformation):
|
||||||
return '+ve'
|
return '+ve'
|
||||||
|
|
||||||
|
|
||||||
|
class NormalTheta(Transformation):
|
||||||
|
_instances = []
|
||||||
|
def __new__(cls, mu_indices, var_indices):
|
||||||
|
if cls._instances:
|
||||||
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
for instance in cls._instances:
|
||||||
|
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
|
||||||
|
return instance()
|
||||||
|
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
|
||||||
|
cls._instances.append(weakref.ref(o))
|
||||||
|
return cls._instances[-1]()
|
||||||
|
|
||||||
|
def __init__(self, mu_indices, var_indices):
|
||||||
|
self.mu_indices = mu_indices
|
||||||
|
self.var_indices = var_indices
|
||||||
|
|
||||||
|
def f(self, theta):
|
||||||
|
# In here abs is only a trick to make sure the numerics are ok.
|
||||||
|
# The variance will never go below zero, but at initialization we need to make sure
|
||||||
|
# that the values are ok
|
||||||
|
# Before:
|
||||||
|
theta[self.var_indices] = np.abs(-.5/theta[self.var_indices])
|
||||||
|
theta[self.mu_indices] *= theta[self.var_indices]
|
||||||
|
return theta # which is now {mu, var}
|
||||||
|
|
||||||
|
def finv(self, muvar):
|
||||||
|
# before:
|
||||||
|
varp = muvar[self.var_indices]
|
||||||
|
muvar[self.mu_indices] /= varp
|
||||||
|
muvar[self.var_indices] = -.5/varp
|
||||||
|
|
||||||
|
return muvar # which is now {theta1, theta2}
|
||||||
|
|
||||||
|
def gradfactor(self, muvar, dmuvar):
|
||||||
|
mu = muvar[self.mu_indices]
|
||||||
|
var = muvar[self.var_indices]
|
||||||
|
#=======================================================================
|
||||||
|
# theta gradients
|
||||||
|
# This works and the gradient checks!
|
||||||
|
dmuvar[self.mu_indices] *= var
|
||||||
|
dmuvar[self.var_indices] *= 2*(var)**2
|
||||||
|
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
|
||||||
|
#=======================================================================
|
||||||
|
|
||||||
|
return dmuvar # which is now the gradient multiplicator for {theta1, theta2}
|
||||||
|
|
||||||
|
def initialize(self, f):
|
||||||
|
if np.any(f[self.var_indices] < 0.):
|
||||||
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||||
|
return f
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "theta"
|
||||||
|
|
||||||
|
class NormalNaturalAntti(NormalTheta):
|
||||||
|
_instances = []
|
||||||
|
_logexp = Logexp()
|
||||||
|
def __new__(cls, mu_indices, var_indices):
|
||||||
|
if cls._instances:
|
||||||
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
for instance in cls._instances:
|
||||||
|
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
|
||||||
|
return instance()
|
||||||
|
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
|
||||||
|
cls._instances.append(weakref.ref(o))
|
||||||
|
return cls._instances[-1]()
|
||||||
|
|
||||||
|
def __init__(self, mu_indices, var_indices):
|
||||||
|
self.mu_indices = mu_indices
|
||||||
|
self.var_indices = var_indices
|
||||||
|
|
||||||
|
def gradfactor(self, muvar, dmuvar):
|
||||||
|
mu = muvar[self.mu_indices]
|
||||||
|
var = muvar[self.var_indices]
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
# theta gradients
|
||||||
|
# This works and the gradient checks!
|
||||||
|
dmuvar[self.mu_indices] *= var
|
||||||
|
dmuvar[self.var_indices] *= 2*var**2#np.einsum('i,i,i,i->i', dmuvar[self.var_indices], [2], var, var)
|
||||||
|
#=======================================================================
|
||||||
|
|
||||||
|
return dmuvar # which is now the gradient multiplicator
|
||||||
|
|
||||||
|
def initialize(self, f):
|
||||||
|
if np.any(f[self.var_indices] < 0.):
|
||||||
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||||
|
return f
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "natantti"
|
||||||
|
|
||||||
|
class NormalEta(Transformation):
|
||||||
|
_instances = []
|
||||||
|
def __new__(cls, mu_indices, var_indices):
|
||||||
|
if cls._instances:
|
||||||
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
for instance in cls._instances:
|
||||||
|
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
|
||||||
|
return instance()
|
||||||
|
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
|
||||||
|
cls._instances.append(weakref.ref(o))
|
||||||
|
return cls._instances[-1]()
|
||||||
|
|
||||||
|
def __init__(self, mu_indices, var_indices):
|
||||||
|
self.mu_indices = mu_indices
|
||||||
|
self.var_indices = var_indices
|
||||||
|
|
||||||
|
def f(self, theta):
|
||||||
|
theta[self.var_indices] = np.abs(theta[self.var_indices] - theta[self.mu_indices]**2)
|
||||||
|
return theta # which is now {mu, var}
|
||||||
|
|
||||||
|
def finv(self, muvar):
|
||||||
|
muvar[self.var_indices] += muvar[self.mu_indices]**2
|
||||||
|
return muvar # which is now {eta1, eta2}
|
||||||
|
|
||||||
|
def gradfactor(self, muvar, dmuvar):
|
||||||
|
mu = muvar[self.mu_indices]
|
||||||
|
#=======================================================================
|
||||||
|
# Lets try natural gradients instead: Not working with bfgs... try stochastic!
|
||||||
|
dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices]
|
||||||
|
#=======================================================================
|
||||||
|
return dmuvar # which is now the gradient multiplicator
|
||||||
|
|
||||||
|
def initialize(self, f):
|
||||||
|
if np.any(f[self.var_indices] < 0.):
|
||||||
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||||
|
return f
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "eta"
|
||||||
|
|
||||||
|
class NormalNaturalThroughTheta(NormalTheta):
|
||||||
|
_instances = []
|
||||||
|
def __new__(cls, mu_indices, var_indices):
|
||||||
|
if cls._instances:
|
||||||
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
for instance in cls._instances:
|
||||||
|
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
|
||||||
|
return instance()
|
||||||
|
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
|
||||||
|
cls._instances.append(weakref.ref(o))
|
||||||
|
return cls._instances[-1]()
|
||||||
|
|
||||||
|
def __init__(self, mu_indices, var_indices):
|
||||||
|
self.mu_indices = mu_indices
|
||||||
|
self.var_indices = var_indices
|
||||||
|
|
||||||
|
def gradfactor(self, muvar, dmuvar):
|
||||||
|
mu = muvar[self.mu_indices]
|
||||||
|
var = muvar[self.var_indices]
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
# This is just eta direction:
|
||||||
|
dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices]
|
||||||
|
#=======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
# This is by going through theta fully and then going into eta direction:
|
||||||
|
#dmu = dmuvar[self.mu_indices]
|
||||||
|
#dmuvar[self.var_indices] += dmu*mu*(var + 4/var)
|
||||||
|
#=======================================================================
|
||||||
|
return dmuvar # which is now the gradient multiplicator
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "natgrad"
|
||||||
|
|
||||||
|
class NormalNaturalThroughEta(NormalEta):
|
||||||
|
_instances = []
|
||||||
|
def __new__(cls, mu_indices, var_indices):
|
||||||
|
if cls._instances:
|
||||||
|
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||||
|
for instance in cls._instances:
|
||||||
|
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
|
||||||
|
return instance()
|
||||||
|
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
|
||||||
|
cls._instances.append(weakref.ref(o))
|
||||||
|
return cls._instances[-1]()
|
||||||
|
|
||||||
|
def __init__(self, mu_indices, var_indices):
|
||||||
|
self.mu_indices = mu_indices
|
||||||
|
self.var_indices = var_indices
|
||||||
|
|
||||||
|
def gradfactor(self, muvar, dmuvar):
|
||||||
|
mu = muvar[self.mu_indices]
|
||||||
|
var = muvar[self.var_indices]
|
||||||
|
#=======================================================================
|
||||||
|
# theta gradients
|
||||||
|
# This works and the gradient checks!
|
||||||
|
dmuvar[self.mu_indices] *= var
|
||||||
|
dmuvar[self.var_indices] *= 2*(var)**2
|
||||||
|
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
|
||||||
|
#=======================================================================
|
||||||
|
return dmuvar
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "natgrad"
|
||||||
|
|
||||||
|
|
||||||
class LogexpNeg(Transformation):
|
class LogexpNeg(Transformation):
|
||||||
domain = _POSITIVE
|
domain = _POSITIVE
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
|
|
@ -75,8 +286,8 @@ class LogexpNeg(Transformation):
|
||||||
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))
|
return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
return np.where(f>_lim_val, -1, -1 + np.exp(-f))
|
return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
if np.any(f < 0.):
|
if np.any(f < 0.):
|
||||||
print "Warning: changing parameters to satisfy constraints"
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
|
@ -92,8 +303,8 @@ class NegativeLogexp(Transformation):
|
||||||
return -self.logexp.f(x) # np.log(1. + np.exp(x))
|
return -self.logexp.f(x) # np.log(1. + np.exp(x))
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
return self.logexp.finv(-f) # np.log(np.exp(-f) - 1.)
|
return self.logexp.finv(-f) # np.log(np.exp(-f) - 1.)
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
return -self.logexp.gradfactor(-f)
|
return np.einsum('i,i->i', df, -self.logexp.gradfactor(-f))
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
return -self.logexp.initialize(f) # np.abs(f)
|
return -self.logexp.initialize(f) # np.abs(f)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
@ -125,10 +336,10 @@ class LogexpClipped(Logexp):
|
||||||
return np.clip(f, self.min_bound, self.max_bound)
|
return np.clip(f, self.min_bound, self.max_bound)
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
return np.log(np.exp(f - 1.))
|
return np.log(np.exp(f - 1.))
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
|
ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
|
||||||
gf = (ef - 1.) / ef
|
gf = (ef - 1.) / ef
|
||||||
return gf # np.where(f < self.lower, 0, gf)
|
return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
if np.any(f < 0.):
|
if np.any(f < 0.):
|
||||||
print "Warning: changing parameters to satisfy constraints"
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
|
@ -143,8 +354,8 @@ class Exponent(Transformation):
|
||||||
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
||||||
def finv(self, x):
|
def finv(self, x):
|
||||||
return np.log(x)
|
return np.log(x)
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
return f
|
return np.einsum('i,i->i', df, f)
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
if np.any(f < 0.):
|
if np.any(f < 0.):
|
||||||
print "Warning: changing parameters to satisfy constraints"
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
|
@ -158,8 +369,8 @@ class NegativeExponent(Exponent):
|
||||||
return -Exponent.f(x)
|
return -Exponent.f(x)
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
return Exponent.finv(-f)
|
return Exponent.finv(-f)
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
return f
|
return np.einsum('i,i->i', df, f)
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
return -Exponent.initialize(f) #np.abs(f)
|
return -Exponent.initialize(f) #np.abs(f)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
@ -171,8 +382,8 @@ class Square(Transformation):
|
||||||
return x ** 2
|
return x ** 2
|
||||||
def finv(self, x):
|
def finv(self, x):
|
||||||
return np.sqrt(x)
|
return np.sqrt(x)
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
return 2 * np.sqrt(f)
|
return np.einsum('i,i->i', df, 2 * np.sqrt(f))
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
return np.abs(f)
|
return np.abs(f)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
@ -201,8 +412,8 @@ class Logistic(Transformation):
|
||||||
return self.lower + self.difference / (1. + np.exp(-x))
|
return self.lower + self.difference / (1. + np.exp(-x))
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
|
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
|
||||||
def gradfactor(self, f):
|
def gradfactor(self, f, df):
|
||||||
return (f - self.lower) * (self.upper - f) / self.difference
|
return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
if np.any(np.logical_or(f < self.lower, f > self.upper)):
|
if np.any(np.logical_or(f < self.lower, f > self.upper)):
|
||||||
print "Warning: changing parameters to satisfy constraints"
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
|
@ -213,4 +424,3 @@ class Logistic(Transformation):
|
||||||
return '{},{}'.format(self.lower, self.upper)
|
return '{},{}'.format(self.lower, self.upper)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,11 @@ from .. import likelihoods
|
||||||
from parameterization.variational import VariationalPosterior
|
from parameterization.variational import VariationalPosterior
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from GPy.inference.latent_function_inference.posterior import Posterior
|
||||||
|
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
|
||||||
|
SparseGPMissing
|
||||||
|
#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
|
||||||
|
#SparseGPMissing
|
||||||
logger = logging.getLogger("sparse gp")
|
logger = logging.getLogger("sparse gp")
|
||||||
|
|
||||||
class SparseGP(GP):
|
class SparseGP(GP):
|
||||||
|
|
@ -34,12 +39,13 @@ class SparseGP(GP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False):
|
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||||
|
name='sparse gp', Y_metadata=None, normalizer=False,
|
||||||
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
#pick a sensible inference method
|
#pick a sensible inference method
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
if isinstance(likelihood, likelihoods.Gaussian):
|
if isinstance(likelihood, likelihoods.Gaussian):
|
||||||
inference_method = var_dtc.VarDTC()
|
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
|
||||||
else:
|
else:
|
||||||
#inference_method = ??
|
#inference_method = ??
|
||||||
raise NotImplementedError, "what to do what to do?"
|
raise NotImplementedError, "what to do what to do?"
|
||||||
|
|
@ -49,46 +55,284 @@ class SparseGP(GP):
|
||||||
self.num_inducing = Z.shape[0]
|
self.num_inducing = Z.shape[0]
|
||||||
|
|
||||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
||||||
|
self.missing_data = missing_data
|
||||||
|
|
||||||
|
if stochastic and missing_data:
|
||||||
|
self.missing_data = True
|
||||||
|
self.ninan = ~np.isnan(Y)
|
||||||
|
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||||
|
elif stochastic and not missing_data:
|
||||||
|
self.missing_data = False
|
||||||
|
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||||
|
elif missing_data:
|
||||||
|
self.missing_data = True
|
||||||
|
self.ninan = ~np.isnan(Y)
|
||||||
|
self.stochastics = SparseGPMissing(self)
|
||||||
|
else:
|
||||||
|
self.stochastics = False
|
||||||
|
|
||||||
logger.info("Adding Z as parameter")
|
logger.info("Adding Z as parameter")
|
||||||
self.link_parameter(self.Z, index=0)
|
self.link_parameter(self.Z, index=0)
|
||||||
|
if self.missing_data:
|
||||||
|
self.Ylist = []
|
||||||
|
overall = self.Y_normalized.shape[1]
|
||||||
|
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
|
||||||
|
message = m_f(-1)
|
||||||
|
print message,
|
||||||
|
for d in xrange(overall):
|
||||||
|
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
|
||||||
|
print ' '*(len(message)+1) + '\r',
|
||||||
|
message = m_f(d)
|
||||||
|
print message,
|
||||||
|
print ''
|
||||||
|
|
||||||
|
self.posterior = None
|
||||||
|
|
||||||
def has_uncertain_inputs(self):
|
def has_uncertain_inputs(self):
|
||||||
return isinstance(self.X, VariationalPosterior)
|
return isinstance(self.X, VariationalPosterior)
|
||||||
|
|
||||||
def parameters_changed(self):
|
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
"""
|
||||||
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
This is the standard part, which usually belongs in parameters_changed.
|
||||||
if isinstance(self.X, VariationalPosterior):
|
|
||||||
|
For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
|
||||||
|
loop, in order to ensure a different handling of gradients etc of different
|
||||||
|
subsets of data.
|
||||||
|
|
||||||
|
The dict in current_values will be passed aroung as current_values for
|
||||||
|
the rest of the algorithm, so this is the place to store current values,
|
||||||
|
such as subsets etc, if necessary.
|
||||||
|
|
||||||
|
If Lm and dL_dKmm can be precomputed (or only need to be computed once)
|
||||||
|
pass them in here, so they will be passed to the inference_method.
|
||||||
|
|
||||||
|
subset_indices is a dictionary of indices. you can put the indices however you
|
||||||
|
like them into this dictionary for inner use of the indices inside the
|
||||||
|
algorithm.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
|
||||||
|
except:
|
||||||
|
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
|
||||||
|
current_values = {}
|
||||||
|
likelihood.update_gradients(grad_dict['dL_dthetaL'])
|
||||||
|
current_values['likgrad'] = likelihood.gradient.copy()
|
||||||
|
if subset_indices is None:
|
||||||
|
subset_indices = {}
|
||||||
|
if isinstance(X, VariationalPosterior):
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
dL_dKmm = self.grad_dict['dL_dKmm']
|
dL_dKmm = grad_dict['dL_dKmm']
|
||||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
kern.update_gradients_full(dL_dKmm, Z, None)
|
||||||
target = self.kern.gradient.copy()
|
current_values['kerngrad'] = kern.gradient.copy()
|
||||||
self.kern.update_gradients_expectations(variational_posterior=self.X,
|
kern.update_gradients_expectations(variational_posterior=X,
|
||||||
Z=self.Z,
|
Z=Z,
|
||||||
dL_dpsi0=self.grad_dict['dL_dpsi0'],
|
dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||||
dL_dpsi1=self.grad_dict['dL_dpsi1'],
|
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||||
dL_dpsi2=self.grad_dict['dL_dpsi2'])
|
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||||
self.kern.gradient += target
|
current_values['kerngrad'] += kern.gradient
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
|
||||||
self.Z.gradient += self.kern.gradients_Z_expectations(
|
current_values['Zgrad'] += kern.gradients_Z_expectations(
|
||||||
self.grad_dict['dL_dpsi0'],
|
grad_dict['dL_dpsi0'],
|
||||||
self.grad_dict['dL_dpsi1'],
|
grad_dict['dL_dpsi1'],
|
||||||
self.grad_dict['dL_dpsi2'],
|
grad_dict['dL_dpsi2'],
|
||||||
Z=self.Z,
|
Z=Z,
|
||||||
variational_posterior=self.X)
|
variational_posterior=X)
|
||||||
else:
|
else:
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
|
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
|
||||||
target = self.kern.gradient.copy()
|
current_values['kerngrad'] = kern.gradient.copy()
|
||||||
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
|
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
|
||||||
target += self.kern.gradient
|
current_values['kerngrad'] += kern.gradient
|
||||||
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
|
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
|
||||||
self.kern.gradient += target
|
current_values['kerngrad'] += kern.gradient
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
|
||||||
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
|
||||||
|
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
|
||||||
|
|
||||||
|
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
||||||
|
"""
|
||||||
|
This is for automatic updates of values in the inner loop of missing
|
||||||
|
data handling. Both arguments are dictionaries and the values in
|
||||||
|
full_values will be updated by the current_gradients.
|
||||||
|
|
||||||
|
If a key from current_values does not exist in full_values, it will be
|
||||||
|
initialized to the value in current_values.
|
||||||
|
|
||||||
|
If there is indices needed for the update, value_indices can be used for
|
||||||
|
that. If value_indices has the same key, as current_values, the update
|
||||||
|
in full_values will be indexed by the indices in value_indices.
|
||||||
|
|
||||||
|
grads:
|
||||||
|
dictionary of standing gradients (you will have to carefully make sure, that
|
||||||
|
the ordering is right!). The values in here will be updated such that
|
||||||
|
full_values[key] += current_values[key] forall key in full_gradients.keys()
|
||||||
|
|
||||||
|
gradients:
|
||||||
|
dictionary of gradients in the current set of parameters.
|
||||||
|
|
||||||
|
value_indices:
|
||||||
|
dictionary holding indices for the update in full_values.
|
||||||
|
if the key exists the update rule is:def df(x):
|
||||||
|
m.stochastics.do_stochastics()
|
||||||
|
grads = m._grads(x)
|
||||||
|
print '\r',
|
||||||
|
message = "Lik: {: 6.4E} Grad: {: 6.4E} Dim: {} Lik: {} Len: {!s}".format(float(m.log_likelihood()), np.einsum('i,i->', grads, grads), m.stochastics.d, float(m.likelihood.variance), " ".join(["{:3.2E}".format(l) for l in m.kern.lengthscale.values]))
|
||||||
|
print message,
|
||||||
|
return grads
|
||||||
|
|
||||||
|
def grad_stop(threshold):
|
||||||
|
def inner(args):
|
||||||
|
g = args['gradient']
|
||||||
|
return np.sqrt(np.einsum('i,i->',g,g)) < threshold
|
||||||
|
return inner
|
||||||
|
|
||||||
|
def maxiter_stop(maxiter):
|
||||||
|
def inner(args):
|
||||||
|
return args['n_iter'] == maxiter
|
||||||
|
return inner
|
||||||
|
|
||||||
|
def optimize(m, maxiter=1000):
|
||||||
|
#opt = climin.RmsProp(m.optimizer_array.copy(), df, 1e-6, decay=0.9, momentum=0.9, step_adapt=1e-7)
|
||||||
|
opt = climin.Adadelta(m.optimizer_array.copy(), df, 1e-2, decay=0.9)
|
||||||
|
ret = opt.minimize_until((grad_stop(.1), maxiter_stop(maxiter)))
|
||||||
|
print
|
||||||
|
return ret
|
||||||
|
full_values[key][value_indices[key]] += current_values[key]
|
||||||
|
"""
|
||||||
|
for key in current_values.keys():
|
||||||
|
if value_indices is not None and value_indices.has_key(key):
|
||||||
|
index = value_indices[key]
|
||||||
|
else:
|
||||||
|
index = slice(None)
|
||||||
|
if full_values.has_key(key):
|
||||||
|
full_values[key][index] += current_values[key]
|
||||||
|
else:
|
||||||
|
full_values[key] = current_values[key]
|
||||||
|
|
||||||
|
def _inner_values_update(self, current_values):
|
||||||
|
"""
|
||||||
|
This exists if there is more to do with the current values.
|
||||||
|
It will be called allways in the inner loop, so that
|
||||||
|
you can do additional inner updates for the inside of the missing data
|
||||||
|
loop etc. This can also be used for stochastic updates, when only working on
|
||||||
|
one dimension of the output.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _outer_values_update(self, full_values):
|
||||||
|
"""
|
||||||
|
Here you put the values, which were collected before in the right places.
|
||||||
|
E.g. set the gradients of parameters, etc.
|
||||||
|
"""
|
||||||
|
self.likelihood.gradient = full_values['likgrad']
|
||||||
|
self.kern.gradient = full_values['kerngrad']
|
||||||
|
self.Z.gradient = full_values['Zgrad']
|
||||||
|
|
||||||
|
def _outer_init_full_values(self):
|
||||||
|
"""
|
||||||
|
If full_values has indices in values_indices, we might want to initialize
|
||||||
|
the full_values differently, so that subsetting is possible.
|
||||||
|
|
||||||
|
Here you can initialize the full_values for the values needed.
|
||||||
|
|
||||||
|
Keep in mind, that if a key does not exist in full_values when updating
|
||||||
|
values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
|
||||||
|
as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
|
||||||
|
to initialize the gradients for the mean and the variance in order to
|
||||||
|
have the full gradient for indexing)
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _outer_loop_for_missing_data(self):
|
||||||
|
Lm = None
|
||||||
|
dL_dKmm = None
|
||||||
|
|
||||||
|
self._log_marginal_likelihood = 0
|
||||||
|
full_values = self._outer_init_full_values()
|
||||||
|
|
||||||
|
if self.posterior is None:
|
||||||
|
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||||
|
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||||
|
else:
|
||||||
|
woodbury_inv = self.posterior._woodbury_inv
|
||||||
|
woodbury_vector = self.posterior._woodbury_vector
|
||||||
|
|
||||||
|
if not self.stochastics:
|
||||||
|
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
|
||||||
|
message = m_f(-1)
|
||||||
|
print message,
|
||||||
|
|
||||||
|
for d in self.stochastics.d:
|
||||||
|
ninan = self.ninan[:, d]
|
||||||
|
|
||||||
|
if not self.stochastics:
|
||||||
|
print ' '*(len(message)) + '\r',
|
||||||
|
message = m_f(d)
|
||||||
|
print message,
|
||||||
|
|
||||||
|
posterior, log_marginal_likelihood, \
|
||||||
|
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
||||||
|
self.kern, self.X[ninan],
|
||||||
|
self.Z, self.likelihood,
|
||||||
|
self.Ylist[d], self.Y_metadata,
|
||||||
|
Lm, dL_dKmm,
|
||||||
|
subset_indices=dict(outputs=d, samples=ninan))
|
||||||
|
|
||||||
|
self._inner_take_over_or_update(full_values, current_values, value_indices)
|
||||||
|
self._inner_values_update(current_values)
|
||||||
|
|
||||||
|
Lm = posterior.K_chol
|
||||||
|
dL_dKmm = grad_dict['dL_dKmm']
|
||||||
|
woodbury_inv[:, :, d] = posterior.woodbury_inv
|
||||||
|
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
|
||||||
|
self._log_marginal_likelihood += log_marginal_likelihood
|
||||||
|
if not self.stochastics:
|
||||||
|
print ''
|
||||||
|
|
||||||
|
if self.posterior is None:
|
||||||
|
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||||
|
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||||
|
self._outer_values_update(full_values)
|
||||||
|
|
||||||
|
def _outer_loop_without_missing_data(self):
|
||||||
|
self._log_marginal_likelihood = 0
|
||||||
|
|
||||||
|
if self.posterior is None:
|
||||||
|
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||||
|
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||||
|
else:
|
||||||
|
woodbury_inv = self.posterior._woodbury_inv
|
||||||
|
woodbury_vector = self.posterior._woodbury_vector
|
||||||
|
|
||||||
|
d = self.stochastics.d
|
||||||
|
posterior, log_marginal_likelihood, \
|
||||||
|
grad_dict, current_values, _ = self._inner_parameters_changed(
|
||||||
|
self.kern, self.X,
|
||||||
|
self.Z, self.likelihood,
|
||||||
|
self.Y_normalized[:, d], self.Y_metadata)
|
||||||
|
self.grad_dict = grad_dict
|
||||||
|
|
||||||
|
self._log_marginal_likelihood += log_marginal_likelihood
|
||||||
|
|
||||||
|
self._outer_values_update(current_values)
|
||||||
|
|
||||||
|
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
|
||||||
|
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||||
|
if self.posterior is None:
|
||||||
|
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||||
|
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
if self.missing_data:
|
||||||
|
self._outer_loop_for_missing_data()
|
||||||
|
elif self.stochastics:
|
||||||
|
self._outer_loop_without_missing_data()
|
||||||
|
else:
|
||||||
|
self.posterior, self._log_marginal_likelihood, self.grad_dict, full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||||
|
self._outer_values_update(full_values)
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, full_cov=False, kern=None):
|
def _raw_predict(self, Xnew, full_cov=False, kern=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,8 @@ class SparseGP_MPI(SparseGP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
|
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False,
|
||||||
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
self._IN_OPTIMIZATION_ = False
|
self._IN_OPTIMIZATION_ = False
|
||||||
if mpi_comm != None:
|
if mpi_comm != None:
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
|
|
@ -42,7 +43,8 @@ class SparseGP_MPI(SparseGP):
|
||||||
else:
|
else:
|
||||||
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
|
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
|
||||||
|
|
||||||
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer,
|
||||||
|
missing_data=missing_data, stochastic=stochastic, batchsize=batchsize)
|
||||||
self.update_model(False)
|
self.update_model(False)
|
||||||
self.link_parameter(self.X, index=0)
|
self.link_parameter(self.X, index=0)
|
||||||
if variational_prior is not None:
|
if variational_prior is not None:
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ from sympy.utilities.iterables import numbered_symbols
|
||||||
import scipy
|
import scipy
|
||||||
import GPy
|
import GPy
|
||||||
#from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
#from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
||||||
from GPy.util.symbolic import normcdf, normcdfln, logistic, logisticln, erfcx, erfc, gammaln
|
#@NDL you removed this file! #from GPy.util.symbolic import normcdf, normcdfln, logistic, logisticln, erfcx, erfc, gammaln
|
||||||
def getFromDict(dataDict, mapList):
|
def getFromDict(dataDict, mapList):
|
||||||
return reduce(lambda d, k: d[k], mapList, dataDict)
|
return reduce(lambda d, k: d[k], mapList, dataDict)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,11 +7,6 @@ Gaussian Processes classification
|
||||||
"""
|
"""
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
try:
|
|
||||||
import pylab as pb
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
default_seed = 10000
|
default_seed = 10000
|
||||||
|
|
||||||
def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
|
def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
|
||||||
|
|
@ -19,7 +14,9 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
|
||||||
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
data = GPy.util.datasets.oil()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.oil()
|
||||||
X = data['X']
|
X = data['X']
|
||||||
Xtest = data['Xtest']
|
Xtest = data['Xtest']
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
|
|
@ -54,7 +51,9 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
Y[Y.flatten() == -1] = 0
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
|
|
@ -71,7 +70,8 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
if plot:
|
if plot:
|
||||||
fig, axes = pb.subplots(2, 1)
|
from matplotlib import pyplot as plt
|
||||||
|
fig, axes = plt.subplots(2, 1)
|
||||||
m.plot_f(ax=axes[0])
|
m.plot_f(ax=axes[0])
|
||||||
m.plot(ax=axes[1])
|
m.plot(ax=axes[1])
|
||||||
|
|
||||||
|
|
@ -87,7 +87,9 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
Y[Y.flatten() == -1] = 0
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
|
|
@ -107,7 +109,8 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
if plot:
|
if plot:
|
||||||
fig, axes = pb.subplots(2, 1)
|
from matplotlib import pyplot as plt
|
||||||
|
fig, axes = plt.subplots(2, 1)
|
||||||
m.plot_f(ax=axes[0])
|
m.plot_f(ax=axes[0])
|
||||||
m.plot(ax=axes[1])
|
m.plot(ax=axes[1])
|
||||||
|
|
||||||
|
|
@ -123,7 +126,9 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
Y[Y.flatten() == -1] = 0
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
|
|
@ -137,7 +142,8 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
if plot:
|
if plot:
|
||||||
fig, axes = pb.subplots(2, 1)
|
from matplotlib import pyplot as plt
|
||||||
|
fig, axes = plt.subplots(2, 1)
|
||||||
m.plot_f(ax=axes[0])
|
m.plot_f(ax=axes[0])
|
||||||
m.plot(ax=axes[1])
|
m.plot(ax=axes[1])
|
||||||
|
|
||||||
|
|
@ -153,7 +159,9 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
Y[Y.flatten() == -1] = 0
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
|
|
@ -171,7 +179,8 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
if plot:
|
if plot:
|
||||||
fig, axes = pb.subplots(2, 1)
|
from matplotlib import pyplot as plt
|
||||||
|
fig, axes = plt.subplots(2, 1)
|
||||||
m.plot_f(ax=axes[0])
|
m.plot_f(ax=axes[0])
|
||||||
m.plot(ax=axes[1])
|
m.plot(ax=axes[1])
|
||||||
|
|
||||||
|
|
@ -190,7 +199,9 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
|
||||||
:param kernel: kernel to use in the model
|
:param kernel: kernel to use in the model
|
||||||
:type kernel: a GPy kernel
|
:type kernel: a GPy kernel
|
||||||
"""
|
"""
|
||||||
data = GPy.util.datasets.crescent_data(seed=seed)
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.crescent_data(seed=seed)
|
||||||
Y = data['Y']
|
Y = data['Y']
|
||||||
Y[Y.flatten()==-1] = 0
|
Y[Y.flatten()==-1] = 0
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
import numpy as _np
|
import numpy as _np
|
||||||
|
|
||||||
#default_seed = _np.random.seed(123344)
|
#default_seed = _np.random.seed(123344)
|
||||||
|
|
||||||
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
|
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
|
||||||
|
|
@ -68,7 +69,8 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
|
||||||
|
|
||||||
def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
||||||
import GPy
|
import GPy
|
||||||
data = GPy.util.datasets.oil_100()
|
import pods
|
||||||
|
data = pods.datasets.oil_100()
|
||||||
Y = data['X']
|
Y = data['X']
|
||||||
# create simple GP model
|
# create simple GP model
|
||||||
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
|
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
|
||||||
|
|
@ -80,8 +82,10 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
||||||
|
|
||||||
def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50):
|
def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50):
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
_np.random.seed(0)
|
_np.random.seed(0)
|
||||||
data = GPy.util.datasets.oil()
|
data = pods.datasets.oil()
|
||||||
Y = data['X'][:N]
|
Y = data['X'][:N]
|
||||||
Y = Y - Y.mean(0)
|
Y = Y - Y.mean(0)
|
||||||
Y /= Y.std(0)
|
Y /= Y.std(0)
|
||||||
|
|
@ -98,7 +102,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
|
||||||
|
|
||||||
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2):
|
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2):
|
||||||
import GPy
|
import GPy
|
||||||
from GPy.util.datasets import swiss_roll_generated
|
from pods.datasets import swiss_roll_generated
|
||||||
from GPy.models import BayesianGPLVM
|
from GPy.models import BayesianGPLVM
|
||||||
|
|
||||||
data = swiss_roll_generated(num_samples=N, sigma=sigma)
|
data = swiss_roll_generated(num_samples=N, sigma=sigma)
|
||||||
|
|
@ -157,9 +161,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
|
||||||
import GPy
|
import GPy
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pods
|
||||||
|
|
||||||
_np.random.seed(0)
|
_np.random.seed(0)
|
||||||
data = GPy.util.datasets.oil()
|
data = pods.datasets.oil()
|
||||||
|
|
||||||
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
||||||
Y = data['X'][:N]
|
Y = data['X'][:N]
|
||||||
|
|
@ -182,9 +187,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
|
||||||
def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
||||||
import GPy
|
import GPy
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
import pods
|
||||||
|
|
||||||
_np.random.seed(0)
|
_np.random.seed(0)
|
||||||
data = GPy.util.datasets.oil()
|
data = pods.datasets.oil()
|
||||||
|
|
||||||
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
||||||
Y = data['X'][:N]
|
Y = data['X'][:N]
|
||||||
|
|
@ -208,12 +214,12 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
|
||||||
Q_signal = 4
|
Q_signal = 4
|
||||||
import GPy
|
import GPy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np.random.seed(0)
|
np.random.seed(3000)
|
||||||
|
|
||||||
k = GPy.kern.Matern32(Q_signal, 1., lengthscale=np.random.uniform(1,6,Q_signal), ARD=1)
|
k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1)
|
||||||
t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T
|
t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T
|
||||||
K = k.K(t)
|
K = k.K(t)
|
||||||
s1, s2, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
|
s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
|
||||||
|
|
||||||
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
|
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
|
||||||
|
|
||||||
|
|
@ -360,23 +366,19 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
):
|
):
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
from GPy.models import BayesianGPLVM
|
from GPy.models import BayesianGPLVM
|
||||||
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
|
||||||
|
|
||||||
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
|
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
|
||||||
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
|
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
|
||||||
Y = Ylist[0]
|
Y = Ylist[0]
|
||||||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||||
|
|
||||||
inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
|
inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data
|
||||||
Ymissing = Y.copy()
|
Ymissing = Y.copy()
|
||||||
Ymissing[inan] = _np.nan
|
Ymissing[inan] = _np.nan
|
||||||
|
|
||||||
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
|
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
|
||||||
inference_method=VarDTCMissingData(inan=inan), kernel=k)
|
kernel=k, missing_data=True)
|
||||||
|
|
||||||
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
|
||||||
m.likelihood.variance = .01
|
|
||||||
m.parameters_changed()
|
|
||||||
m.Yreal = Y
|
m.Yreal = Y
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
|
|
@ -445,8 +447,9 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
|
||||||
|
|
||||||
def brendan_faces(optimize=True, verbose=True, plot=True):
|
def brendan_faces(optimize=True, verbose=True, plot=True):
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.brendan_faces()
|
data = pods.datasets.brendan_faces()
|
||||||
Q = 2
|
Q = 2
|
||||||
Y = data['Y']
|
Y = data['Y']
|
||||||
Yn = Y - Y.mean()
|
Yn = Y - Y.mean()
|
||||||
|
|
@ -469,8 +472,9 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
|
||||||
|
|
||||||
def olivetti_faces(optimize=True, verbose=True, plot=True):
|
def olivetti_faces(optimize=True, verbose=True, plot=True):
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.olivetti_faces()
|
data = pods.datasets.olivetti_faces()
|
||||||
Q = 2
|
Q = 2
|
||||||
Y = data['Y']
|
Y = data['Y']
|
||||||
Yn = Y - Y.mean()
|
Yn = Y - Y.mean()
|
||||||
|
|
@ -490,7 +494,9 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
|
||||||
|
|
||||||
def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True):
|
def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True):
|
||||||
import GPy
|
import GPy
|
||||||
data = GPy.util.datasets.osu_run1()
|
import pods
|
||||||
|
|
||||||
|
data = pods.datasets.osu_run1()
|
||||||
# optimize
|
# optimize
|
||||||
if range == None:
|
if range == None:
|
||||||
Y = data['Y'].copy()
|
Y = data['Y'].copy()
|
||||||
|
|
@ -505,8 +511,9 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru
|
||||||
def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = pods.datasets.osu_run1()
|
||||||
# optimize
|
# optimize
|
||||||
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
||||||
if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
|
||||||
|
|
@ -524,8 +531,9 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = pods.datasets.osu_run1()
|
||||||
# optimize
|
# optimize
|
||||||
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
|
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
|
||||||
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
||||||
|
|
@ -543,8 +551,9 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = pods.datasets.osu_run1()
|
||||||
# optimize
|
# optimize
|
||||||
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
|
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
|
||||||
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
||||||
|
|
@ -563,8 +572,9 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||||
def robot_wireless(optimize=True, verbose=True, plot=True):
|
def robot_wireless(optimize=True, verbose=True, plot=True):
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.robot_wireless()
|
data = pods.datasets.robot_wireless()
|
||||||
# optimize
|
# optimize
|
||||||
m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25)
|
m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25)
|
||||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||||
|
|
@ -578,8 +588,9 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = pods.datasets.osu_run1()
|
||||||
Q = 6
|
Q = 6
|
||||||
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
|
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
|
||||||
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
||||||
|
|
@ -609,8 +620,9 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
||||||
|
|
||||||
def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True):
|
def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True):
|
||||||
import GPy
|
import GPy
|
||||||
|
import pods
|
||||||
|
|
||||||
data = GPy.util.datasets.cmu_mocap(subject, motion)
|
data = pods.datasets.cmu_mocap(subject, motion)
|
||||||
if in_place:
|
if in_place:
|
||||||
# Make figure move in place.
|
# Make figure move in place.
|
||||||
data['Y'][:, 0:3] = 0.0
|
data['Y'][:, 0:3] = 0.0
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,9 @@ import GPy
|
||||||
|
|
||||||
def olympic_marathon_men(optimize=True, plot=True):
|
def olympic_marathon_men(optimize=True, plot=True):
|
||||||
"""Run a standard Gaussian process regression on the Olympic marathon data."""
|
"""Run a standard Gaussian process regression on the Olympic marathon data."""
|
||||||
data = GPy.util.datasets.olympic_marathon_men()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.olympic_marathon_men()
|
||||||
|
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.GPRegression(data['X'], data['Y'])
|
m = GPy.models.GPRegression(data['X'], data['Y'])
|
||||||
|
|
@ -82,7 +84,9 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
|
||||||
from the Mount Epomeo runs. Requires gpxpy to be installed on your system
|
from the Mount Epomeo runs. Requires gpxpy to be installed on your system
|
||||||
to load in the data.
|
to load in the data.
|
||||||
"""
|
"""
|
||||||
data = GPy.util.datasets.epomeo_gpx()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.epomeo_gpx()
|
||||||
num_data_list = []
|
num_data_list = []
|
||||||
for Xpart in data['X']:
|
for Xpart in data['X']:
|
||||||
num_data_list.append(Xpart.shape[0])
|
num_data_list.append(Xpart.shape[0])
|
||||||
|
|
@ -107,9 +111,9 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
|
||||||
k = k1**k2
|
k = k1**k2
|
||||||
|
|
||||||
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
|
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
|
||||||
m.constrain_fixed('.*rbf_var', 1.)
|
m.constrain_fixed('.*variance', 1.)
|
||||||
m.constrain_fixed('iip')
|
m.inducing_inputs.constrain_fixed()
|
||||||
m.constrain_bounded('noise_variance', 1e-3, 1e-1)
|
m.Gaussian_noise.variance.constrain_bounded(1e-3, 1e-1)
|
||||||
m.optimize(max_iters=max_iters,messages=True)
|
m.optimize(max_iters=max_iters,messages=True)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
@ -125,7 +129,9 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
||||||
length_scales = np.linspace(0.1, 60., resolution)
|
length_scales = np.linspace(0.1, 60., resolution)
|
||||||
log_SNRs = np.linspace(-3., 4., resolution)
|
log_SNRs = np.linspace(-3., 4., resolution)
|
||||||
|
|
||||||
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
|
||||||
# data['Y'] = data['Y'][0::2, :]
|
# data['Y'] = data['Y'][0::2, :]
|
||||||
# data['X'] = data['X'][0::2, :]
|
# data['X'] = data['X'][0::2, :]
|
||||||
|
|
||||||
|
|
@ -205,13 +211,15 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
|
||||||
|
|
||||||
def olympic_100m_men(optimize=True, plot=True):
|
def olympic_100m_men(optimize=True, plot=True):
|
||||||
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
|
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
|
||||||
data = GPy.util.datasets.olympic_100m_men()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.olympic_100m_men()
|
||||||
|
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.GPRegression(data['X'], data['Y'])
|
m = GPy.models.GPRegression(data['X'], data['Y'])
|
||||||
|
|
||||||
# set the lengthscale to be something sensible (defaults to 1)
|
# set the lengthscale to be something sensible (defaults to 1)
|
||||||
m['rbf_lengthscale'] = 10
|
m.rbf.lengthscale = 10
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('bfgs', max_iters=200)
|
m.optimize('bfgs', max_iters=200)
|
||||||
|
|
@ -222,7 +230,9 @@ def olympic_100m_men(optimize=True, plot=True):
|
||||||
|
|
||||||
def toy_rbf_1d(optimize=True, plot=True):
|
def toy_rbf_1d(optimize=True, plot=True):
|
||||||
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||||
data = GPy.util.datasets.toy_rbf_1d()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.toy_rbf_1d()
|
||||||
|
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.GPRegression(data['X'], data['Y'])
|
m = GPy.models.GPRegression(data['X'], data['Y'])
|
||||||
|
|
@ -236,7 +246,9 @@ def toy_rbf_1d(optimize=True, plot=True):
|
||||||
|
|
||||||
def toy_rbf_1d_50(optimize=True, plot=True):
|
def toy_rbf_1d_50(optimize=True, plot=True):
|
||||||
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||||
data = GPy.util.datasets.toy_rbf_1d_50()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.toy_rbf_1d_50()
|
||||||
|
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.GPRegression(data['X'], data['Y'])
|
m = GPy.models.GPRegression(data['X'], data['Y'])
|
||||||
|
|
@ -303,12 +315,11 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
|
||||||
# m.set_prior('.*lengthscale',len_prior)
|
# m.set_prior('.*lengthscale',len_prior)
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
|
m.optimize(optimizer='scg', max_iters=max_iters)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
m.kern.plot_ARD()
|
m.kern.plot_ARD()
|
||||||
|
|
||||||
print m
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True):
|
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True):
|
||||||
|
|
@ -343,24 +354,25 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
|
||||||
# m.set_prior('.*lengthscale',len_prior)
|
# m.set_prior('.*lengthscale',len_prior)
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
|
m.optimize(optimizer='scg', max_iters=max_iters)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
m.kern.plot_ARD()
|
m.kern.plot_ARD()
|
||||||
|
|
||||||
print m
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
|
def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
|
||||||
"""Predict the location of a robot given wirelss signal strength readings."""
|
"""Predict the location of a robot given wirelss signal strength readings."""
|
||||||
data = GPy.util.datasets.robot_wireless()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.robot_wireless()
|
||||||
|
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel)
|
m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize(messages=True, max_iters=max_iters)
|
m.optimize(max_iters=max_iters)
|
||||||
|
|
||||||
Xpredict = m.predict(data['Ytest'])[0]
|
Xpredict = m.predict(data['Ytest'])[0]
|
||||||
if plot:
|
if plot:
|
||||||
|
|
@ -372,13 +384,14 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
|
||||||
|
|
||||||
sse = ((data['Xtest'] - Xpredict)**2).sum()
|
sse = ((data['Xtest'] - Xpredict)**2).sum()
|
||||||
|
|
||||||
print m
|
|
||||||
print('Sum of squares error on test data: ' + str(sse))
|
print('Sum of squares error on test data: ' + str(sse))
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def silhouette(max_iters=100, optimize=True, plot=True):
|
def silhouette(max_iters=100, optimize=True, plot=True):
|
||||||
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
|
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
|
||||||
data = GPy.util.datasets.silhouette()
|
try:import pods
|
||||||
|
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||||
|
data = pods.datasets.silhouette()
|
||||||
|
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.GPRegression(data['X'], data['Y'])
|
m = GPy.models.GPRegression(data['X'], data['Y'])
|
||||||
|
|
@ -390,7 +403,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
|
||||||
print m
|
print m
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True):
|
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
|
||||||
"""Run a 1D example of a sparse GP regression."""
|
"""Run a 1D example of a sparse GP regression."""
|
||||||
# sample inputs and outputs
|
# sample inputs and outputs
|
||||||
X = np.random.uniform(-3., 3., (num_samples, 1))
|
X = np.random.uniform(-3., 3., (num_samples, 1))
|
||||||
|
|
@ -401,10 +414,10 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
||||||
|
|
||||||
if checkgrad:
|
if checkgrad:
|
||||||
m.checkgrad(verbose=1)
|
m.checkgrad()
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('tnc', messages=1, max_iters=max_iters)
|
m.optimize('tnc', max_iters=max_iters)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
m.plot()
|
m.plot()
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,13 @@ class EPDTC(LatentFunctionInference):
|
||||||
self.get_trYYT.limit = limit
|
self.get_trYYT.limit = limit
|
||||||
self.get_YYTfactor.limit = limit
|
self.get_YYTfactor.limit = limit
|
||||||
|
|
||||||
|
def on_optimization_start(self):
|
||||||
|
self._ep_approximation = None
|
||||||
|
|
||||||
|
def on_optimization_end(self):
|
||||||
|
# TODO: update approximation in the end as well? Maybe even with a switch?
|
||||||
|
pass
|
||||||
|
|
||||||
def _get_trYYT(self, Y):
|
def _get_trYYT(self, Y):
|
||||||
return np.sum(np.square(Y))
|
return np.sum(np.square(Y))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ class Laplace(LatentFunctionInference):
|
||||||
|
|
||||||
#define the objective function (to be maximised)
|
#define the objective function (to be maximised)
|
||||||
def obj(Ki_f, f):
|
def obj(Ki_f, f):
|
||||||
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata)
|
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
|
||||||
|
|
||||||
difference = np.inf
|
difference = np.inf
|
||||||
iteration = 0
|
iteration = 0
|
||||||
|
|
@ -152,7 +152,7 @@ class Laplace(LatentFunctionInference):
|
||||||
Ki_W_i = K - C.T.dot(C)
|
Ki_W_i = K - C.T.dot(C)
|
||||||
|
|
||||||
#compute the log marginal
|
#compute the log marginal
|
||||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
|
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata).sum() - np.sum(np.log(np.diag(L)))
|
||||||
|
|
||||||
# Compute matrices for derivatives
|
# Compute matrices for derivatives
|
||||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,9 @@ class VarDTC(LatentFunctionInference):
|
||||||
def get_VVTfactor(self, Y, prec):
|
def get_VVTfactor(self, Y, prec):
|
||||||
return Y * prec # TODO chache this, and make it effective
|
return Y * prec # TODO chache this, and make it effective
|
||||||
|
|
||||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
|
||||||
|
|
||||||
|
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
|
||||||
|
|
||||||
_, output_dim = Y.shape
|
_, output_dim = Y.shape
|
||||||
uncertain_inputs = isinstance(X, VariationalPosterior)
|
uncertain_inputs = isinstance(X, VariationalPosterior)
|
||||||
|
|
@ -84,9 +86,9 @@ class VarDTC(LatentFunctionInference):
|
||||||
|
|
||||||
Kmm = kern.K(Z).copy()
|
Kmm = kern.K(Z).copy()
|
||||||
diag.add(Kmm, self.const_jitter)
|
diag.add(Kmm, self.const_jitter)
|
||||||
|
if Lm is None:
|
||||||
Lm = jitchol(Kmm)
|
Lm = jitchol(Kmm)
|
||||||
|
|
||||||
|
|
||||||
# The rather complex computations of A, and the psi stats
|
# The rather complex computations of A, and the psi stats
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
psi0 = kern.psi0(Z, X)
|
psi0 = kern.psi0(Z, X)
|
||||||
|
|
@ -100,7 +102,6 @@ class VarDTC(LatentFunctionInference):
|
||||||
else:
|
else:
|
||||||
psi0 = kern.Kdiag(X)
|
psi0 = kern.Kdiag(X)
|
||||||
psi1 = kern.K(X, Z)
|
psi1 = kern.K(X, Z)
|
||||||
psi2 = None
|
|
||||||
if het_noise:
|
if het_noise:
|
||||||
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||||
else:
|
else:
|
||||||
|
|
@ -122,6 +123,7 @@ class VarDTC(LatentFunctionInference):
|
||||||
delit = tdot(_LBi_Lmi_psi1Vf)
|
delit = tdot(_LBi_Lmi_psi1Vf)
|
||||||
data_fit = np.trace(delit)
|
data_fit = np.trace(delit)
|
||||||
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
|
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
|
||||||
|
if dL_dKmm is None:
|
||||||
delit = -0.5 * DBi_plus_BiPBi
|
delit = -0.5 * DBi_plus_BiPBi
|
||||||
delit += -0.5 * B * output_dim
|
delit += -0.5 * B * output_dim
|
||||||
delit += output_dim * np.eye(num_inducing)
|
delit += output_dim * np.eye(num_inducing)
|
||||||
|
|
@ -208,6 +210,7 @@ class VarDTCMissingData(LatentFunctionInference):
|
||||||
inan = np.isnan(Y)
|
inan = np.isnan(Y)
|
||||||
has_none = inan.any()
|
has_none = inan.any()
|
||||||
self._inan = ~inan
|
self._inan = ~inan
|
||||||
|
inan = self._inan
|
||||||
else:
|
else:
|
||||||
inan = self._inan
|
inan = self._inan
|
||||||
has_none = True
|
has_none = True
|
||||||
|
|
@ -241,7 +244,7 @@ class VarDTCMissingData(LatentFunctionInference):
|
||||||
self._subarray_indices = [[slice(None),slice(None)]]
|
self._subarray_indices = [[slice(None),slice(None)]]
|
||||||
return [Y], [(Y**2).sum()]
|
return [Y], [(Y**2).sum()]
|
||||||
|
|
||||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None):
|
||||||
if isinstance(X, VariationalPosterior):
|
if isinstance(X, VariationalPosterior):
|
||||||
uncertain_inputs = True
|
uncertain_inputs = True
|
||||||
psi0_all = kern.psi0(Z, X)
|
psi0_all = kern.psi0(Z, X)
|
||||||
|
|
@ -260,7 +263,7 @@ class VarDTCMissingData(LatentFunctionInference):
|
||||||
dL_dpsi0_all = np.zeros(Y.shape[0])
|
dL_dpsi0_all = np.zeros(Y.shape[0])
|
||||||
dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
|
dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
dL_dpsi2_all = np.zeros((num_inducing, num_inducing))
|
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
|
||||||
|
|
||||||
dL_dR = 0
|
dL_dR = 0
|
||||||
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
|
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
|
||||||
|
|
@ -271,31 +274,31 @@ class VarDTCMissingData(LatentFunctionInference):
|
||||||
Kmm = kern.K(Z).copy()
|
Kmm = kern.K(Z).copy()
|
||||||
diag.add(Kmm, self.const_jitter)
|
diag.add(Kmm, self.const_jitter)
|
||||||
#factor Kmm
|
#factor Kmm
|
||||||
|
if Lm is None:
|
||||||
Lm = jitchol(Kmm)
|
Lm = jitchol(Kmm)
|
||||||
if uncertain_inputs: LmInv = dtrtri(Lm)
|
if uncertain_inputs: LmInv = dtrtri(Lm)
|
||||||
|
|
||||||
size = Y.shape[1]
|
size = len(Ys)
|
||||||
next_ten = 0
|
next_ten = 0
|
||||||
|
|
||||||
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
|
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
|
||||||
if ((i+1.)/size) >= next_ten:
|
if ((i+1.)/size) >= next_ten:
|
||||||
logger.info('inference {:> 6.1%}'.format((i+1.)/size))
|
logger.info('inference {:> 6.1%}'.format((i+1.)/size))
|
||||||
next_ten += .1
|
next_ten += .1
|
||||||
if het_noise: beta = beta_all[i]
|
if het_noise: beta = beta_all[i]
|
||||||
else: beta = beta_all
|
else: beta = beta_all
|
||||||
|
|
||||||
VVT_factor = (y*beta)
|
VVT_factor = (y*beta)
|
||||||
output_dim = 1#len(ind)
|
output_dim = 1#len(ind)
|
||||||
|
|
||||||
psi0 = psi0_all[v]
|
psi0 = psi0_all[v]
|
||||||
psi1 = psi1_all[v, :]
|
psi1 = psi1_all[v, :]
|
||||||
if uncertain_inputs: psi2 = kern.psi2(Z, X[v, :])
|
if uncertain_inputs:
|
||||||
|
psi2 = kern.psi2(Z, X[v, :])
|
||||||
else: psi2 = None
|
else: psi2 = None
|
||||||
num_data = psi1.shape[0]
|
num_data = psi1.shape[0]
|
||||||
|
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
|
if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
|
||||||
else: psi2_beta = psi2 * beta
|
else: psi2_beta = psi2 * (beta)
|
||||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||||
else:
|
else:
|
||||||
if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||||
|
|
@ -330,11 +333,11 @@ class VarDTCMissingData(LatentFunctionInference):
|
||||||
dL_dpsi0_all[v] += dL_dpsi0
|
dL_dpsi0_all[v] += dL_dpsi0
|
||||||
dL_dpsi1_all[v, :] += dL_dpsi1
|
dL_dpsi1_all[v, :] += dL_dpsi1
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
dL_dpsi2_all += dL_dpsi2
|
dL_dpsi2_all[v, :, :] += dL_dpsi2
|
||||||
|
|
||||||
# log marginal likelihood
|
# log marginal likelihood
|
||||||
log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
|
log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
|
||||||
psi0, A, LB, trYYT, data_fit,VVT_factor)
|
psi0, A, LB, trYYT, data_fit, VVT_factor)
|
||||||
|
|
||||||
#put the gradients in the right places
|
#put the gradients in the right places
|
||||||
dL_dR += _compute_dL_dR(likelihood,
|
dL_dR += _compute_dL_dR(likelihood,
|
||||||
|
|
@ -393,7 +396,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
|
||||||
# subsume back into psi1 (==Kmn)
|
# subsume back into psi1 (==Kmn)
|
||||||
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
|
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
|
||||||
dL_dpsi2 = None
|
dL_dpsi2 = None
|
||||||
|
|
||||||
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
53
GPy/inference/optimization/stochastics.py
Normal file
53
GPy/inference/optimization/stochastics.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
'''
|
||||||
|
Created on 9 Oct 2014
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
|
||||||
|
class StochasticStorage(object):
|
||||||
|
'''
|
||||||
|
This is a container for holding the stochastic parameters,
|
||||||
|
such as subset indices or step length and so on.
|
||||||
|
'''
|
||||||
|
def __init__(self, model):
|
||||||
|
"""
|
||||||
|
Initialize this stochastic container using the given model
|
||||||
|
"""
|
||||||
|
|
||||||
|
def do_stochastics(self):
|
||||||
|
"""
|
||||||
|
Update the internal state to the next batch of the stochastic
|
||||||
|
descent algorithm.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SparseGPMissing(StochasticStorage):
|
||||||
|
def __init__(self, model, batchsize=1):
|
||||||
|
"""
|
||||||
|
Here we want to loop over all dimensions everytime.
|
||||||
|
Thus, we can just make sure the loop goes over self.d every
|
||||||
|
time.
|
||||||
|
"""
|
||||||
|
self.d = xrange(model.Y_normalized.shape[1])
|
||||||
|
|
||||||
|
class SparseGPStochastics(StochasticStorage):
|
||||||
|
"""
|
||||||
|
For the sparse gp we need to store the dimension we are in,
|
||||||
|
and the indices corresponding to those
|
||||||
|
"""
|
||||||
|
def __init__(self, model, batchsize=1):
|
||||||
|
import itertools
|
||||||
|
self.batchsize = batchsize
|
||||||
|
if self.batchsize == 1:
|
||||||
|
self.dimensions = itertools.cycle(range(model.Y_normalized.shape[1]))
|
||||||
|
else:
|
||||||
|
import numpy as np
|
||||||
|
self.dimensions = lambda: np.random.choice(model.Y_normalized.shape[1], size=batchsize, replace=False)
|
||||||
|
self.d = None
|
||||||
|
self.do_stochastics()
|
||||||
|
|
||||||
|
def do_stochastics(self):
|
||||||
|
if self.batchsize == 1:
|
||||||
|
self.d = [self.dimensions.next()]
|
||||||
|
else:
|
||||||
|
self.d = self.dimensions()
|
||||||
|
|
@ -17,17 +17,3 @@ from _src.poly import Poly
|
||||||
from _src.trunclinear import TruncLinear,TruncLinear_inf
|
from _src.trunclinear import TruncLinear,TruncLinear_inf
|
||||||
from _src.splitKern import SplitKern,DiffGenomeKern
|
from _src.splitKern import SplitKern,DiffGenomeKern
|
||||||
|
|
||||||
# TODO: put this in an init file somewhere
|
|
||||||
#I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
|
|
||||||
try:
|
|
||||||
import sympy as sym
|
|
||||||
sympy_available=True
|
|
||||||
except ImportError:
|
|
||||||
sympy_available=False
|
|
||||||
|
|
||||||
if sympy_available:
|
|
||||||
from _src.symbolic import Symbolic
|
|
||||||
from _src.eq import Eq
|
|
||||||
from _src.heat_eqinit import Heat_eqinit
|
|
||||||
#from _src.ode1_eq_lfm import Ode1_eq_lfm
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
try:
|
|
||||||
import sympy as sym
|
|
||||||
sympy_available=True
|
|
||||||
except ImportError:
|
|
||||||
sympy_available=False
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from symbolic import Symbolic
|
|
||||||
|
|
||||||
class Eq(Symbolic):
|
|
||||||
"""
|
|
||||||
The exponentiated quadratic covariance as a symbolic function.
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, input_dim, output_dim=1, variance=1.0, lengthscale=1.0, name='Eq'):
|
|
||||||
|
|
||||||
parameters = {'variance' : variance, 'lengthscale' : lengthscale}
|
|
||||||
x = sym.symbols('x_:' + str(input_dim))
|
|
||||||
z = sym.symbols('z_:' + str(input_dim))
|
|
||||||
variance = sym.var('variance',positive=True)
|
|
||||||
lengthscale = sym.var('lengthscale', positive=True)
|
|
||||||
dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
|
|
||||||
from sympy.parsing.sympy_parser import parse_expr
|
|
||||||
dist = parse_expr(dist_string)
|
|
||||||
|
|
||||||
# this is the covariance function
|
|
||||||
f = variance*sym.exp(-dist/(2*lengthscale**2))
|
|
||||||
# extra input dim is to signify the output dimension.
|
|
||||||
super(Eq, self).__init__(input_dim=input_dim, k=f, output_dim=output_dim, parameters=parameters, name=name)
|
|
||||||
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
try:
|
|
||||||
import sympy as sym
|
|
||||||
sympy_available=True
|
|
||||||
except ImportError:
|
|
||||||
sympy_available=False
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from symbolic import Symbolic
|
|
||||||
|
|
||||||
class Heat_eqinit(Symbolic):
|
|
||||||
"""
|
|
||||||
A symbolic covariance based on laying down an initial condition of the heat equation with an exponentiated quadratic covariance. The covariance then has multiple outputs which are interpreted as observations of the diffused process with different diffusion coefficients (or at different times).
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, input_dim, output_dim=1, param=None, name='Heat_eqinit'):
|
|
||||||
|
|
||||||
x = sym.symbols('x_:' + str(input_dim))
|
|
||||||
z = sym.symbols('z_:' + str(input_dim))
|
|
||||||
scale = sym.var('scale_i scale_j',positive=True)
|
|
||||||
lengthscale = sym.var('lengthscale_i lengthscale_j', positive=True)
|
|
||||||
shared_lengthscale = sym.var('shared_lengthscale', positive=True)
|
|
||||||
dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
|
|
||||||
from sympy.parsing.sympy_parser import parse_expr
|
|
||||||
dist = parse_expr(dist_string)
|
|
||||||
|
|
||||||
# this is the covariance function
|
|
||||||
f = scale_i*scale_j*sym.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j)))
|
|
||||||
# extra input dim is to signify the output dimension.
|
|
||||||
super(Heat_eqinit, self).__init__(input_dim=input_dim+1, k=f, output_dim=output_dim, name=name)
|
|
||||||
|
|
||||||
|
|
@ -6,6 +6,7 @@ import numpy as np
|
||||||
from ...core.parameterization.parameterized import Parameterized
|
from ...core.parameterization.parameterized import Parameterized
|
||||||
from kernel_slice_operations import KernCallsViaSlicerMeta
|
from kernel_slice_operations import KernCallsViaSlicerMeta
|
||||||
from ...util.caching import Cache_this
|
from ...util.caching import Cache_this
|
||||||
|
from GPy.core.parameterization.observable_array import ObsAr
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -48,12 +49,26 @@ class Kern(Parameterized):
|
||||||
if active_dims is None:
|
if active_dims is None:
|
||||||
active_dims = np.arange(input_dim)
|
active_dims = np.arange(input_dim)
|
||||||
|
|
||||||
self.active_dims = np.array(active_dims, dtype=int)
|
self.active_dims = np.atleast_1d(active_dims).astype(int)
|
||||||
|
|
||||||
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
|
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
|
||||||
|
|
||||||
self._sliced_X = 0
|
self._sliced_X = 0
|
||||||
self.useGPU = self._support_GPU and useGPU
|
self.useGPU = self._support_GPU and useGPU
|
||||||
|
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def return_psi2_n(self):
|
||||||
|
"""
|
||||||
|
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
|
||||||
|
"""
|
||||||
|
return self._return_psi2_n_flag[0]
|
||||||
|
@return_psi2_n.setter
|
||||||
|
def return_psi2_n(self, val):
|
||||||
|
def visit(self):
|
||||||
|
if isinstance(self, Kern):
|
||||||
|
self._return_psi2_n_flag[0]=val
|
||||||
|
self.traverse(visit)
|
||||||
|
|
||||||
@Cache_this(limit=20)
|
@Cache_this(limit=20)
|
||||||
def _slice_X(self, X):
|
def _slice_X(self, X):
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ import sslinear_psi_comp
|
||||||
import linear_psi_comp
|
import linear_psi_comp
|
||||||
|
|
||||||
class PSICOMP_RBF(Pickleable):
|
class PSICOMP_RBF(Pickleable):
|
||||||
|
|
||||||
@Cache_this(limit=2, ignore_args=(0,))
|
@Cache_this(limit=2, ignore_args=(0,))
|
||||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||||
|
|
|
||||||
|
|
@ -139,8 +139,7 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
||||||
denom2 = np.square(denom)
|
denom2 = np.square(denom)
|
||||||
|
|
||||||
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
||||||
|
Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
|
||||||
Lpsi2 = dL_dpsi2[None,:,:]*_psi2
|
|
||||||
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
||||||
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
||||||
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,8 @@ from ...core.parameterization.transformations import Logexp
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
from ... import util
|
from ... import util
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import integrate
|
from scipy import integrate, weave
|
||||||
|
from ...util.config import config # for assesing whether to use weave
|
||||||
from ...util.caching import Cache_this
|
from ...util.caching import Cache_this
|
||||||
|
|
||||||
class Stationary(Kern):
|
class Stationary(Kern):
|
||||||
|
|
@ -71,6 +72,13 @@ class Stationary(Kern):
|
||||||
|
|
||||||
@Cache_this(limit=5, ignore_args=())
|
@Cache_this(limit=5, ignore_args=())
|
||||||
def K(self, X, X2=None):
|
def K(self, X, X2=None):
|
||||||
|
"""
|
||||||
|
Kernel function applied on inputs X and X2.
|
||||||
|
In the stationary case there is an inner function depending on the
|
||||||
|
distances from X to X2, called r.
|
||||||
|
|
||||||
|
K(X, X2) = K_of_r((X-X2)**2)
|
||||||
|
"""
|
||||||
r = self._scaled_dist(X, X2)
|
r = self._scaled_dist(X, X2)
|
||||||
return self.K_of_r(r)
|
return self.K_of_r(r)
|
||||||
|
|
||||||
|
|
@ -124,10 +132,22 @@ class Stationary(Kern):
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
"""
|
||||||
|
Given the derivative of the objective with respect to the diagonal of
|
||||||
|
the covariance matrix, compute the derivative wrt the parameters of
|
||||||
|
this kernel and stor in the <parameter>.gradient field.
|
||||||
|
|
||||||
|
See also update_gradients_full
|
||||||
|
"""
|
||||||
self.variance.gradient = np.sum(dL_dKdiag)
|
self.variance.gradient = np.sum(dL_dKdiag)
|
||||||
self.lengthscale.gradient = 0.
|
self.lengthscale.gradient = 0.
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
"""
|
||||||
|
Given the derivative of the objective wrt the covariance matrix
|
||||||
|
(dL_dK), compute the gradient wrt the parameters of this kernel,
|
||||||
|
and store in the parameters object as e.g. self.variance.gradient
|
||||||
|
"""
|
||||||
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
|
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
|
||||||
|
|
||||||
#now the lengthscale gradient(s)
|
#now the lengthscale gradient(s)
|
||||||
|
|
@ -139,6 +159,16 @@ class Stationary(Kern):
|
||||||
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
|
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
|
||||||
tmp = dL_dr*self._inv_dist(X, X2)
|
tmp = dL_dr*self._inv_dist(X, X2)
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
|
|
||||||
|
|
||||||
|
if config.getboolean('weave', 'working'):
|
||||||
|
try:
|
||||||
|
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
|
||||||
|
except:
|
||||||
|
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
|
||||||
|
config.set('weave', 'working', 'False')
|
||||||
|
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||||
|
else:
|
||||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||||
else:
|
else:
|
||||||
r = self._scaled_dist(X, X2)
|
r = self._scaled_dist(X, X2)
|
||||||
|
|
@ -154,10 +184,43 @@ class Stationary(Kern):
|
||||||
dist = self._scaled_dist(X, X2).copy()
|
dist = self._scaled_dist(X, X2).copy()
|
||||||
return 1./np.where(dist != 0., dist, np.inf)
|
return 1./np.where(dist != 0., dist, np.inf)
|
||||||
|
|
||||||
|
def weave_lengthscale_grads(self, tmp, X, X2):
|
||||||
|
"""Use scipy.weave to compute derivatives wrt the lengthscales"""
|
||||||
|
N,M = tmp.shape
|
||||||
|
Q = X.shape[1]
|
||||||
|
if hasattr(X, 'values'):X = X.values
|
||||||
|
if hasattr(X2, 'values'):X2 = X2.values
|
||||||
|
grads = np.zeros(self.input_dim)
|
||||||
|
code = """
|
||||||
|
double gradq;
|
||||||
|
for(int q=0; q<Q; q++){
|
||||||
|
gradq = 0;
|
||||||
|
for(int n=0; n<N; n++){
|
||||||
|
for(int m=0; m<M; m++){
|
||||||
|
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
grads[q] = gradq;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
|
||||||
|
return -grads/self.lengthscale**3
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
"""
|
"""
|
||||||
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
|
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
|
||||||
"""
|
"""
|
||||||
|
if config.getboolean('weave', 'working'):
|
||||||
|
try:
|
||||||
|
return self.gradients_X_weave(dL_dK, X, X2)
|
||||||
|
except:
|
||||||
|
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
|
||||||
|
config.set('weave', 'working', 'False')
|
||||||
|
return self.gradients_X_(dL_dK, X, X2)
|
||||||
|
else:
|
||||||
|
return self.gradients_X_(dL_dK, X, X2)
|
||||||
|
|
||||||
|
def gradients_X_(self, dL_dK, X, X2=None):
|
||||||
invdist = self._inv_dist(X, X2)
|
invdist = self._inv_dist(X, X2)
|
||||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||||
tmp = invdist*dL_dr
|
tmp = invdist*dL_dr
|
||||||
|
|
@ -177,6 +240,36 @@ class Stationary(Kern):
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def gradients_X_weave(self, dL_dK, X, X2=None):
|
||||||
|
invdist = self._inv_dist(X, X2)
|
||||||
|
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||||
|
tmp = invdist*dL_dr
|
||||||
|
if X2 is None:
|
||||||
|
tmp = tmp + tmp.T
|
||||||
|
X2 = X
|
||||||
|
|
||||||
|
code = """
|
||||||
|
int n,q,d;
|
||||||
|
double retnd;
|
||||||
|
for(n=0;n<N;n++){
|
||||||
|
for(d=0;d<D;d++){
|
||||||
|
retnd = 0;
|
||||||
|
for(q=0;q<Q;q++){
|
||||||
|
retnd += tmp(n,q)*(X(n,d)-X2(q,d));
|
||||||
|
}
|
||||||
|
ret(n,d) = retnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
|
||||||
|
if hasattr(X2, 'values'):X2 = X2.values
|
||||||
|
ret = np.zeros(X.shape)
|
||||||
|
N,D = X.shape
|
||||||
|
Q = tmp.shape[1]
|
||||||
|
from scipy import weave
|
||||||
|
weave.inline(code, ['ret', 'N', 'D', 'Q', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz)
|
||||||
|
return ret/self.lengthscale**2
|
||||||
|
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
return np.zeros(X.shape)
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,18 +6,3 @@ from poisson import Poisson
|
||||||
from student_t import StudentT
|
from student_t import StudentT
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
from mixed_noise import MixedNoise
|
from mixed_noise import MixedNoise
|
||||||
#TODO need to fix this in a config file.
|
|
||||||
#TODO need to add the files to the git repo!
|
|
||||||
try:
|
|
||||||
import sympy as sym
|
|
||||||
sympy_available=True
|
|
||||||
except ImportError:
|
|
||||||
sympy_available=False
|
|
||||||
if sympy_available:
|
|
||||||
#These are likelihoods that rely on symbolic.
|
|
||||||
from symbolic import Symbolic
|
|
||||||
from sstudent_t import SstudentT
|
|
||||||
from negative_binomial import Negative_binomial
|
|
||||||
from skew_normal import Skew_normal
|
|
||||||
from skew_exponential import Skew_exponential
|
|
||||||
# from null_category import Null_category
|
|
||||||
|
|
|
||||||
|
|
@ -113,10 +113,8 @@ class Bernoulli(Likelihood):
|
||||||
.. Note:
|
.. Note:
|
||||||
Each y_i must be in {0, 1}
|
Each y_i must be in {0, 1}
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
|
||||||
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
|
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
|
||||||
objective = np.where(y, inv_link_f, 1.-inv_link_f)
|
return np.where(y, inv_link_f, 1.-inv_link_f)
|
||||||
return np.exp(np.sum(np.log(objective)))
|
|
||||||
|
|
||||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -133,13 +131,9 @@ class Bernoulli(Likelihood):
|
||||||
:returns: log likelihood evaluated at points inverse link of f.
|
:returns: log likelihood evaluated at points inverse link of f.
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
|
||||||
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
|
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
|
||||||
state = np.seterr(divide='ignore')
|
p = np.where(y==1, inv_link_f, 1.-inv_link_f)
|
||||||
# TODO check y \in {0, 1} or {-1, 1}
|
return np.log(p)
|
||||||
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
|
|
||||||
np.seterr(**state)
|
|
||||||
return np.sum(objective)
|
|
||||||
|
|
||||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -156,13 +150,10 @@ class Bernoulli(Likelihood):
|
||||||
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
|
||||||
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
|
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
|
||||||
state = np.seterr(divide='ignore')
|
#grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
|
||||||
# TODO check y \in {0, 1} or {-1, 1}
|
denom = np.where(y, inv_link_f, -(1-inv_link_f))
|
||||||
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
|
return 1./denom
|
||||||
np.seterr(**state)
|
|
||||||
return grad
|
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -185,13 +176,13 @@ class Bernoulli(Likelihood):
|
||||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
|
||||||
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
|
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
|
||||||
state = np.seterr(divide='ignore')
|
#d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
|
||||||
# TODO check y \in {0, 1} or {-1, 1}
|
arg = np.where(y, inv_link_f, 1.-inv_link_f)
|
||||||
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
|
ret = -1./np.square(np.clip(arg, 1e-3, np.inf))
|
||||||
np.seterr(**state)
|
if np.any(np.isinf(ret)):
|
||||||
return d2logpdf_dlink2
|
stop
|
||||||
|
return ret
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -131,6 +131,56 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
return z, mean, variance
|
return z, mean, variance
|
||||||
|
|
||||||
|
def variational_expectations(self, Y, m, v, gh_points=None):
|
||||||
|
"""
|
||||||
|
Use Gauss-Hermite Quadrature to compute
|
||||||
|
|
||||||
|
E_p(f) [ log p(y|f) ]
|
||||||
|
d/dm E_p(f) [ log p(y|f) ]
|
||||||
|
d/dv E_p(f) [ log p(y|f) ]
|
||||||
|
|
||||||
|
where p(f) is a Gaussian with mean m and variance v. The shapes of Y, m and v should match.
|
||||||
|
|
||||||
|
if no gh_points are passed, we construct them using defualt options
|
||||||
|
"""
|
||||||
|
|
||||||
|
if gh_points is None:
|
||||||
|
gh_x, gh_w = np.polynomial.hermite.hermgauss(12)
|
||||||
|
else:
|
||||||
|
gh_x, gh_w = gh_points
|
||||||
|
|
||||||
|
shape = m.shape
|
||||||
|
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
|
||||||
|
|
||||||
|
#make a grid of points
|
||||||
|
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
|
||||||
|
|
||||||
|
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
|
||||||
|
# broadcast needs to be handled carefully.
|
||||||
|
logp = self.logpdf(X,Y[:,None])
|
||||||
|
dlogp_dx = self.dlogpdf_df(X, Y[:,None])
|
||||||
|
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
|
||||||
|
|
||||||
|
#clipping for numerical stability
|
||||||
|
logp = np.clip(logp,-1e6,1e6)
|
||||||
|
dlogp_dx = np.clip(dlogp_dx,-1e6,1e6)
|
||||||
|
d2logp_dx2 = np.clip(d2logp_dx2,-1e6,1e6)
|
||||||
|
|
||||||
|
#average over the gird to get derivatives of the Gaussian's parameters
|
||||||
|
F = np.dot(logp, gh_w)
|
||||||
|
dF_dm = np.dot(dlogp_dx, gh_w)
|
||||||
|
dF_dv = np.dot(d2logp_dx2, gh_w)/2.
|
||||||
|
|
||||||
|
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
|
||||||
|
stop
|
||||||
|
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
|
||||||
|
stop
|
||||||
|
|
||||||
|
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def predictive_mean(self, mu, variance, Y_metadata=None):
|
def predictive_mean(self, mu, variance, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
|
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,6 @@ class Probit(GPTransformation):
|
||||||
|
|
||||||
g(f) = \\Phi^{-1} (mu)
|
g(f) = \\Phi^{-1} (mu)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def transf(self,f):
|
def transf(self,f):
|
||||||
return std_norm_cdf(f)
|
return std_norm_cdf(f)
|
||||||
|
|
@ -88,6 +87,34 @@ class Probit(GPTransformation):
|
||||||
f2 = f**2
|
f2 = f**2
|
||||||
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
||||||
|
|
||||||
|
|
||||||
|
class Cloglog(GPTransformation):
|
||||||
|
"""
|
||||||
|
Complementary log-log link
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
p(f) = 1 - e^{-e^f}
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
f = \log (-\log(1-p))
|
||||||
|
|
||||||
|
"""
|
||||||
|
def transf(self,f):
|
||||||
|
return 1-np.exp(-np.exp(f))
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return np.exp(f-np.exp(f))
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
ef = np.exp(f)
|
||||||
|
return -np.exp(f-ef)*(ef-1.)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
ef = np.exp(f)
|
||||||
|
return np.exp(f-ef)*(1.-3*ef + ef**2)
|
||||||
|
|
||||||
|
|
||||||
class Log(GPTransformation):
|
class Log(GPTransformation):
|
||||||
"""
|
"""
|
||||||
.. math::
|
.. math::
|
||||||
|
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
||||||
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import sympy as sym
|
|
||||||
sympy_available=True
|
|
||||||
from sympy.utilities.lambdify import lambdify
|
|
||||||
from GPy.util.symbolic import gammaln, logisticln
|
|
||||||
except ImportError:
|
|
||||||
sympy_available=False
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import link_functions
|
|
||||||
from symbolic import Symbolic
|
|
||||||
from scipy import stats
|
|
||||||
|
|
||||||
class Negative_binomial(Symbolic):
|
|
||||||
"""
|
|
||||||
Negative binomial
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
|
|
||||||
|
|
||||||
.. Note::
|
|
||||||
Y takes non zero integer values..
|
|
||||||
link function should have a positive domain, e.g. log (default).
|
|
||||||
|
|
||||||
.. See also::
|
|
||||||
symbolic.py, for the parent class
|
|
||||||
"""
|
|
||||||
def __init__(self, gp_link=None, dispersion=1.0):
|
|
||||||
parameters = {'dispersion':dispersion}
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = link_functions.Identity()
|
|
||||||
|
|
||||||
dispersion = sym.Symbol('dispersion', positive=True, real=True)
|
|
||||||
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
|
|
||||||
f_0 = sym.Symbol('f_0', positive=True, real=True)
|
|
||||||
gp_link = link_functions.Log()
|
|
||||||
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y_0)*sym.log(dispersion+f_0) + gammaln(y_0+dispersion) - gammaln(y_0+1) - gammaln(dispersion) + y_0*sym.log(f_0)
|
|
||||||
#log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion))
|
|
||||||
super(Negative_binomial, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Negative_binomial')
|
|
||||||
|
|
||||||
# TODO: Check this.
|
|
||||||
self.log_concave = False
|
|
||||||
|
|
||||||
|
|
@ -5,7 +5,6 @@ from __future__ import division
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats,special
|
from scipy import stats,special
|
||||||
import scipy as sp
|
import scipy as sp
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import link_functions
|
import link_functions
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import sympy as sym
|
|
||||||
from GPy.util.symbolic import normcdfln
|
|
||||||
import numpy as np
|
|
||||||
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
|
||||||
#from GPy.util.functions import clip_exp
|
|
||||||
import link_functions
|
|
||||||
from symbolic import Symbolic
|
|
||||||
from scipy import stats
|
|
||||||
|
|
||||||
class Skew_exponential(Symbolic):
|
|
||||||
"""
|
|
||||||
Negative binomial
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
.. Note::
|
|
||||||
Y takes real values.
|
|
||||||
link function is identity
|
|
||||||
|
|
||||||
.. See also::
|
|
||||||
symbolic.py, for the parent class
|
|
||||||
"""
|
|
||||||
def __init__(self, gp_link=None, shape=1.0, scale=1.0):
|
|
||||||
parameters={'scale':scale, 'shape':shape}
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = link_functions.Identity()
|
|
||||||
|
|
||||||
#func_modules = [{'exp':clip_exp}]
|
|
||||||
|
|
||||||
scale = sym.Symbol('scale', positive=True, real=True)
|
|
||||||
shape = sym.Symbol('shape', real=True)
|
|
||||||
y_0 = sym.Symbol('y_0', real=True)
|
|
||||||
f_0 = sym.Symbol('f_0', real=True)
|
|
||||||
log_pdf=sym.log(shape)-sym.log(scale)-((y_0-f_0)/scale) + normcdfln(shape*(y_0-f_0)/scale)
|
|
||||||
super(Skew_exponential, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Skew_exponential', parameters=parameters)
|
|
||||||
|
|
||||||
# TODO: Check this.
|
|
||||||
self.log_concave = True
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,52 +0,0 @@
|
||||||
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import sympy as sym
|
|
||||||
sympy_available=True
|
|
||||||
from sympy.utilities.lambdify import lambdify
|
|
||||||
from GPy.util.symbolic import normcdfln, normcdf
|
|
||||||
except ImportError:
|
|
||||||
sympy_available=False
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from GPy.util.functions import clip_exp
|
|
||||||
import link_functions
|
|
||||||
from symbolic import Symbolic
|
|
||||||
from scipy import stats
|
|
||||||
|
|
||||||
class Skew_normal(Symbolic):
|
|
||||||
"""
|
|
||||||
Skew Normal distribution.
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
.. Note::
|
|
||||||
Y takes real values.
|
|
||||||
link function is identity
|
|
||||||
|
|
||||||
.. See also::
|
|
||||||
symbolic.py, for the parent class
|
|
||||||
"""
|
|
||||||
def __init__(self, gp_link=None, shape=1.0, scale=1.0):
|
|
||||||
parameters = {'scale': scale, 'shape':shape}
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = link_functions.Identity()
|
|
||||||
|
|
||||||
# # this likelihood has severe problems with likelihoods saturating exponentials, so clip_exp is used in place of the true exp as a solution for dealing with the numerics.
|
|
||||||
# func_modules = [{'exp':clip_exp}]
|
|
||||||
func_modules = []
|
|
||||||
|
|
||||||
scale = sym.Symbol('scale', positive=True, real=True)
|
|
||||||
shape = sym.Symbol('shape', real=True)
|
|
||||||
y_0 = sym.Symbol('y_0', real=True)
|
|
||||||
f_0 = sym.Symbol('f_0', real=True)
|
|
||||||
log_pdf=-sym.log(scale)-1./2*sym.log(2*sym.pi)-1./2*((y_0-f_0)/scale)**2 + sym.log(2) + normcdfln(shape*(y_0-f_0)/scale)
|
|
||||||
super(Skew_normal, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Skew_normal', func_modules=func_modules)
|
|
||||||
|
|
||||||
self.log_concave = True
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import sympy as sym
|
|
||||||
from sympy.utilities.lambdify import lambdify
|
|
||||||
from GPy.util.symbolic import gammaln
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import link_functions
|
|
||||||
from symbolic import Symbolic
|
|
||||||
from scipy import stats
|
|
||||||
|
|
||||||
class SstudentT(Symbolic):
|
|
||||||
"""
|
|
||||||
Symbolic variant of the Student-t distribution.
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
.. Note::
|
|
||||||
Y takes real values.
|
|
||||||
link function is identity
|
|
||||||
|
|
||||||
.. See also::
|
|
||||||
symbolic.py, for the parent class
|
|
||||||
"""
|
|
||||||
def __init__(self, gp_link=None, deg_free=5.0, t_scale2=1.0):
|
|
||||||
parameters = {'deg_free':5.0, 't_scale2':1.0}
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = link_functions.Identity()
|
|
||||||
|
|
||||||
# this likelihood has severe problems with likelihoods saturating ...
|
|
||||||
y_0 = sym.Symbol('y_0', real=True)
|
|
||||||
f_0 = sym.Symbol('f_0', real=True)
|
|
||||||
nu = sym.Symbol('nu', positive=True, real=True)
|
|
||||||
t_scale2 = sym.Symbol('t_scale2', positive=True, real=True)
|
|
||||||
log_pdf = (gammaln((nu + 1) * 0.5)
|
|
||||||
- gammaln(nu * 0.5)
|
|
||||||
- 0.5*sym.log(t_scale2 * nu * sym.pi)
|
|
||||||
- 0.5*(nu + 1)*sym.log(1 + (1/nu)*(((y_0-f_0)**2)/t_scale2)))
|
|
||||||
super(SstudentT, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='SstudentT')
|
|
||||||
self.log_concave = False
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,279 +0,0 @@
|
||||||
# Copyright (c) 2014 GPy Authors
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
import sympy as sym
|
|
||||||
import numpy as np
|
|
||||||
from likelihood import Likelihood
|
|
||||||
from ..core.symbolic import Symbolic_core
|
|
||||||
|
|
||||||
|
|
||||||
class Symbolic(Likelihood, Symbolic_core):
|
|
||||||
"""
|
|
||||||
Symbolic likelihood.
|
|
||||||
|
|
||||||
Likelihood where the form of the likelihood is provided by a sympy expression.
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, parameters=None, func_modules=[]):
|
|
||||||
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = link_functions.Identity()
|
|
||||||
|
|
||||||
if log_pdf is None:
|
|
||||||
raise ValueError, "You must provide an argument for the log pdf."
|
|
||||||
|
|
||||||
Likelihood.__init__(self, gp_link, name=name)
|
|
||||||
functions = {'log_pdf':log_pdf}
|
|
||||||
self.cacheable = ['F', 'Y']
|
|
||||||
|
|
||||||
self.missing_data = False
|
|
||||||
if missing_log_pdf:
|
|
||||||
self.missing_data = True
|
|
||||||
functions['missing_log_pdf']=missing_log_pdf
|
|
||||||
|
|
||||||
self.ep_analytic = False
|
|
||||||
if logZ:
|
|
||||||
self.ep_analytic = True
|
|
||||||
functions['logZ'] = logZ
|
|
||||||
self.cacheable += ['M', 'V']
|
|
||||||
|
|
||||||
Symbolic_core.__init__(self, functions, cacheable=self.cacheable, derivatives = ['F', 'theta'], parameters=parameters, func_modules=func_modules)
|
|
||||||
|
|
||||||
# TODO: Is there an easy way to check whether the pdf is log
|
|
||||||
self.log_concave = log_concave
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _set_derivatives(self, derivatives):
|
|
||||||
# these are arguments for computing derivatives.
|
|
||||||
print "Whoop"
|
|
||||||
Symbolic_core._set_derivatives(self, derivatives)
|
|
||||||
|
|
||||||
# add second and third derivatives for Laplace approximation.
|
|
||||||
derivative_arguments = []
|
|
||||||
if derivatives is not None:
|
|
||||||
for derivative in derivatives:
|
|
||||||
derivative_arguments += self.variables[derivative]
|
|
||||||
exprs = ['log_pdf']
|
|
||||||
if self.missing_data:
|
|
||||||
exprs.append('missing_log_pdf')
|
|
||||||
for expr in exprs:
|
|
||||||
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative']['f_0'], theta)) for theta in derivative_arguments}
|
|
||||||
self.expressions[expr]['third_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['second_derivative']['f_0'], theta)) for theta in derivative_arguments}
|
|
||||||
if self.ep_analytic:
|
|
||||||
derivative_arguments = [M]
|
|
||||||
# add second derivative for EP
|
|
||||||
exprs = ['logZ']
|
|
||||||
if self.missing_data:
|
|
||||||
exprs.append('missing_logZ')
|
|
||||||
for expr in exprs:
|
|
||||||
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative'], theta)) for theta in derivative_arguments}
|
|
||||||
|
|
||||||
|
|
||||||
def eval_update_cache(self, Y, **kwargs):
|
|
||||||
# TODO: place checks for inf/nan in here
|
|
||||||
# for all provided keywords
|
|
||||||
Symbolic_core.eval_update_cache(self, Y=Y, **kwargs)
|
|
||||||
# Y = np.atleast_2d(Y)
|
|
||||||
# for variable, code in sorted(self.code['parameters_changed'].iteritems()):
|
|
||||||
# self._set_attribute(variable, eval(code, self.namespace))
|
|
||||||
# for i, theta in enumerate(self.variables['Y']):
|
|
||||||
# missing = np.isnan(Y[:, i])
|
|
||||||
# self._set_attribute('missing_' + str(i), missing)
|
|
||||||
# self._set_attribute(theta.name, value[missing, i][:, None])
|
|
||||||
# for variable, value in kwargs.items():
|
|
||||||
# # update their cached values
|
|
||||||
# if value is not None:
|
|
||||||
# if variable == 'F' or variable == 'M' or variable == 'V' or variable == 'Y_metadata':
|
|
||||||
# for i, theta in enumerate(self.variables[variable]):
|
|
||||||
# self._set_attribute(theta.name, value[:, i][:, None])
|
|
||||||
# else:
|
|
||||||
# self._set_attribute(theta.name, value[:, i])
|
|
||||||
# for variable, code in sorted(self.code['update_cache'].iteritems()):
|
|
||||||
# self._set_attribute(variable, eval(code, self.namespace))
|
|
||||||
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def update_gradients(self, grads):
|
|
||||||
"""
|
|
||||||
Pull out the gradients, be careful as the order must match the order
|
|
||||||
in which the parameters are added
|
|
||||||
"""
|
|
||||||
# The way the Laplace approximation is run requires the
|
|
||||||
# covariance function to compute the true gradient (because it
|
|
||||||
# is dependent on the mode). This means we actually compute
|
|
||||||
# the gradient outside this object. This function would
|
|
||||||
# normally ask the object to update its gradients internally,
|
|
||||||
# but here it provides them externally, because they are
|
|
||||||
# computed in the inference code. TODO: Thought: How does this
|
|
||||||
# effect EP? Shouldn't this be done by a separate
|
|
||||||
# Laplace-approximation specific call?
|
|
||||||
for theta, grad in zip(self.variables['theta'], grads):
|
|
||||||
parameter = getattr(self, theta.name)
|
|
||||||
setattr(parameter, 'gradient', grad)
|
|
||||||
|
|
||||||
def pdf_link(self, f, y, Y_metadata=None):
|
|
||||||
"""
|
|
||||||
Likelihood function given inverse link of f.
|
|
||||||
|
|
||||||
:param f: inverse link of latent variables.
|
|
||||||
:type f: Nx1 array
|
|
||||||
:param y: data
|
|
||||||
:type y: Nx1 array
|
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
|
||||||
:returns: likelihood evaluated for this point
|
|
||||||
:rtype: float
|
|
||||||
"""
|
|
||||||
return np.exp(self.logpdf_link(f, y, Y_metadata=None))
|
|
||||||
|
|
||||||
def logpdf_link(self, f, y, Y_metadata=None):
|
|
||||||
"""
|
|
||||||
Log Likelihood Function given inverse link of latent variables.
|
|
||||||
|
|
||||||
:param f: latent variables (inverse link of f)
|
|
||||||
:type f: Nx1 array
|
|
||||||
:param y: data
|
|
||||||
:type y: Nx1 array
|
|
||||||
:param Y_metadata: Y_metadata
|
|
||||||
:returns: likelihood evaluated for this point
|
|
||||||
:rtype: float
|
|
||||||
|
|
||||||
"""
|
|
||||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
|
||||||
if self.missing_data:
|
|
||||||
missing_flag = np.isnan(y)
|
|
||||||
not_missing_flag = np.logical_not(missing_flag)
|
|
||||||
ll = self.eval_function('missing_log_pdf', F=f[missing_flag]).sum()
|
|
||||||
ll += self.eval_function('log_pdf', F=f[not_missing_flag], Y=y[not_missing_flag], Y_metadata=Y_metadata[not_missing_flag]).sum()
|
|
||||||
else:
|
|
||||||
ll = self.eval_function('log_pdf', F=f, Y=y, Y_metadata=Y_metadata).sum()
|
|
||||||
|
|
||||||
return ll
|
|
||||||
|
|
||||||
def dlogpdf_dlink(self, f, y, Y_metadata=None):
|
|
||||||
"""
|
|
||||||
Gradient of log likelihood with respect to the inverse link function.
|
|
||||||
|
|
||||||
:param f: latent variables (inverse link of f)
|
|
||||||
:type f: Nx1 array
|
|
||||||
:param y: data
|
|
||||||
:type y: Nx1 array
|
|
||||||
:param Y_metadata: Y_metadata
|
|
||||||
:returns: gradient of likelihood with respect to each point.
|
|
||||||
:rtype: Nx1 array
|
|
||||||
|
|
||||||
"""
|
|
||||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
|
||||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
|
||||||
if self.missing_data:
|
|
||||||
return np.where(np.isnan(y),
|
|
||||||
eval(self.code['missing_log_pdf']['derivative']['f_0'], self.namespace),
|
|
||||||
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
|
|
||||||
else:
|
|
||||||
return np.where(np.isnan(y),
|
|
||||||
0.,
|
|
||||||
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
|
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, f, y, Y_metadata=None):
|
|
||||||
"""
|
|
||||||
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
|
|
||||||
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
|
|
||||||
|
|
||||||
|
|
||||||
:param f: inverse link of the latent variables.
|
|
||||||
:type f: Nx1 array
|
|
||||||
:param y: data
|
|
||||||
:type y: Nx1 array
|
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
|
||||||
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
|
|
||||||
:rtype: Nx1 array
|
|
||||||
|
|
||||||
.. Note::
|
|
||||||
Returns diagonal of Hessian, since every where else it is
|
|
||||||
0, as the likelihood factorizes over cases (the
|
|
||||||
distribution for y_i depends only on link(f_i) not on
|
|
||||||
link(f_(j!=i))
|
|
||||||
"""
|
|
||||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
|
||||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
|
||||||
if self.missing_data:
|
|
||||||
return np.where(np.isnan(y),
|
|
||||||
eval(self.code['missing_log_pdf']['second_derivative']['f_0'], self.namespace),
|
|
||||||
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
|
|
||||||
else:
|
|
||||||
return np.where(np.isnan(y),
|
|
||||||
0.,
|
|
||||||
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
|
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, f, y, Y_metadata=None):
|
|
||||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
|
||||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
|
||||||
if self.missing_data:
|
|
||||||
return np.where(np.isnan(y),
|
|
||||||
eval(self.code['missing_log_pdf']['third_derivative']['f_0'], self.namespace),
|
|
||||||
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
|
|
||||||
else:
|
|
||||||
return np.where(np.isnan(y),
|
|
||||||
0.,
|
|
||||||
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
|
|
||||||
|
|
||||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
|
||||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
|
||||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
|
||||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
|
|
||||||
for i, theta in enumerate(self.variables['theta']):
|
|
||||||
if self.missing_data:
|
|
||||||
g[:, i:i+1] = np.where(np.isnan(y),
|
|
||||||
eval(self.code['missing_log_pdf']['derivative'][theta.name], self.namespace),
|
|
||||||
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
|
|
||||||
else:
|
|
||||||
g[:, i:i+1] = np.where(np.isnan(y),
|
|
||||||
0.,
|
|
||||||
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
|
|
||||||
return g.sum(0)
|
|
||||||
|
|
||||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
|
||||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
|
||||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
|
||||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
|
|
||||||
for i, theta in enumerate(self.variables['theta']):
|
|
||||||
if self.missing_data:
|
|
||||||
g[:, i:i+1] = np.where(np.isnan(y),
|
|
||||||
eval(self.code['missing_log_pdf']['second_derivative'][theta.name], self.namespace),
|
|
||||||
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
|
|
||||||
else:
|
|
||||||
g[:, i:i+1] = np.where(np.isnan(y),
|
|
||||||
0.,
|
|
||||||
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
|
|
||||||
return g
|
|
||||||
|
|
||||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
|
||||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
|
||||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
|
||||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
|
|
||||||
for i, theta in enumerate(self.variables['theta']):
|
|
||||||
if self.missing_data:
|
|
||||||
g[:, i:i+1] = np.where(np.isnan(y),
|
|
||||||
eval(self.code['missing_log_pdf']['third_derivative'][theta.name], self.namespace),
|
|
||||||
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
|
|
||||||
else:
|
|
||||||
g[:, i:i+1] = np.where(np.isnan(y),
|
|
||||||
0.,
|
|
||||||
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
|
|
||||||
return g
|
|
||||||
|
|
||||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def conditional_mean(self, gp):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def conditional_variance(self, gp):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def samples(self, gp, Y_metadata=None):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
@ -5,13 +5,3 @@ from kernel import Kernel
|
||||||
from linear import Linear
|
from linear import Linear
|
||||||
from mlp import MLP
|
from mlp import MLP
|
||||||
#from rbf import RBF
|
#from rbf import RBF
|
||||||
# TODO need to fix this in a config file.
|
|
||||||
try:
|
|
||||||
import sympy as sym
|
|
||||||
sympy_available=True
|
|
||||||
except ImportError:
|
|
||||||
sympy_available=False
|
|
||||||
|
|
||||||
if sympy_available:
|
|
||||||
# These are likelihoods that rely on symbolic.
|
|
||||||
from symbolic import Symbolic
|
|
||||||
|
|
|
||||||
|
|
@ -1,57 +0,0 @@
|
||||||
# Copyright (c) 2014 GPy Authors
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import sympy as sym
|
|
||||||
from ..core.mapping import Mapping, Bijective_mapping
|
|
||||||
from ..core.symbolic import Symbolic_core
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
class Symbolic(Mapping, Symbolic_core):
|
|
||||||
"""
|
|
||||||
Symbolic mapping
|
|
||||||
|
|
||||||
Mapping where the form of the mapping is provided by a sympy expression.
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, input_dim, output_dim, f=None, name='symbolic', parameters=None, func_modules=[]):
|
|
||||||
|
|
||||||
|
|
||||||
if f is None:
|
|
||||||
raise ValueError, "You must provide an argument for the function."
|
|
||||||
|
|
||||||
Mapping.__init__(self, input_dim, output_dim, name=name)
|
|
||||||
Symbolic_core.__init__(self, {'f': f}, ['X'], derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)
|
|
||||||
|
|
||||||
self._initialize_cache()
|
|
||||||
self.parameters_changed()
|
|
||||||
|
|
||||||
def _initialize_cache(self):
|
|
||||||
self._set_attribute('x_0', np.random.normal(size=(3, self.input_dim)))
|
|
||||||
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
self.eval_parameters_changed()
|
|
||||||
|
|
||||||
def update_cache(self, X=None):
|
|
||||||
self.eval_update_cache(X=X)
|
|
||||||
|
|
||||||
def update_gradients(self, partial, X=None):
|
|
||||||
for name, val in self.eval_update_gradients('f', partial, X=X).iteritems():
|
|
||||||
setattr(getattr(self, name), 'gradient', val)
|
|
||||||
|
|
||||||
def gradients_X(self, partial, X=None):
|
|
||||||
return self.eval_gradients_X('f', partial, X=X)
|
|
||||||
|
|
||||||
def f(self, X=None):
|
|
||||||
"""
|
|
||||||
"""
|
|
||||||
return self.eval_function('f', X=X)
|
|
||||||
|
|
||||||
|
|
||||||
def df_dX(self, X):
|
|
||||||
"""
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def df_dtheta(self, X):
|
|
||||||
pass
|
|
||||||
|
|
@ -5,10 +5,8 @@ import numpy as np
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..core.sparse_gp_mpi import SparseGP_MPI
|
from ..core.sparse_gp_mpi import SparseGP_MPI
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from ..inference.optimization import SCG
|
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
from ..util import linalg
|
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
|
||||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
|
|
||||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
|
|
||||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
@ -25,12 +23,14 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None):
|
Z=None, kernel=None, inference_method=None, likelihood=None,
|
||||||
|
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
||||||
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
self.mpi_comm = mpi_comm
|
self.mpi_comm = mpi_comm
|
||||||
self.__IN_OPTIMIZATION__ = False
|
self.__IN_OPTIMIZATION__ = False
|
||||||
|
|
||||||
self.logger = logging.getLogger(self.__class__.__name__)
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
if X == None:
|
if X is None:
|
||||||
from ..util.initialization import initialize_latent
|
from ..util.initialization import initialize_latent
|
||||||
self.logger.info("initializing latent space X with method {}".format(init))
|
self.logger.info("initializing latent space X with method {}".format(init))
|
||||||
X, fracs = initialize_latent(init, input_dim, Y)
|
X, fracs = initialize_latent(init, input_dim, Y)
|
||||||
|
|
@ -59,24 +59,24 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
X = NormalPosterior(X, X_variance)
|
X = NormalPosterior(X, X_variance)
|
||||||
|
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
inan = np.isnan(Y)
|
if mpi_comm is not None:
|
||||||
if np.any(inan):
|
|
||||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
|
|
||||||
self.logger.debug("creating inference_method with var_dtc missing data")
|
|
||||||
inference_method = VarDTCMissingData(inan=inan)
|
|
||||||
elif mpi_comm is not None:
|
|
||||||
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
||||||
else:
|
else:
|
||||||
from ..inference.latent_function_inference.var_dtc import VarDTC
|
from ..inference.latent_function_inference.var_dtc import VarDTC
|
||||||
self.logger.debug("creating inference_method var_dtc")
|
self.logger.debug("creating inference_method var_dtc")
|
||||||
inference_method = VarDTC()
|
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
|
||||||
if isinstance(inference_method,VarDTC_minibatch):
|
if isinstance(inference_method,VarDTC_minibatch):
|
||||||
inference_method.mpi_comm = mpi_comm
|
inference_method.mpi_comm = mpi_comm
|
||||||
|
|
||||||
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
|
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
|
||||||
kernel.psicomp.GPU_direct = True
|
kernel.psicomp.GPU_direct = True
|
||||||
|
|
||||||
super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood, name=name, inference_method=inference_method, normalizer=normalizer, mpi_comm=mpi_comm, variational_prior=self.variational_prior)
|
super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
|
||||||
|
name=name, inference_method=inference_method,
|
||||||
|
normalizer=normalizer, mpi_comm=mpi_comm,
|
||||||
|
variational_prior=self.variational_prior,
|
||||||
|
missing_data=missing_data, stochastic=stochastic,
|
||||||
|
batchsize=batchsize)
|
||||||
|
|
||||||
def set_X_gradients(self, X, X_grad):
|
def set_X_gradients(self, X, X_grad):
|
||||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||||
|
|
@ -86,15 +86,62 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
"""Get the gradients of the posterior distribution of X in its specific form."""
|
||||||
return X.mean.gradient, X.variance.gradient
|
return X.mean.gradient, X.variance.gradient
|
||||||
|
|
||||||
|
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
|
||||||
|
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
|
||||||
|
|
||||||
|
kl_fctr = 1.
|
||||||
|
if self.missing_data:
|
||||||
|
d = self.output_dim
|
||||||
|
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
|
||||||
|
else:
|
||||||
|
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
|
||||||
|
|
||||||
|
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
|
||||||
|
variational_posterior=X,
|
||||||
|
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||||
|
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||||
|
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||||
|
|
||||||
|
# Subsetting Variational Posterior objects, makes the gradients
|
||||||
|
# empty. We need them to be 0 though:
|
||||||
|
X.mean.gradient[:] = 0
|
||||||
|
X.variance.gradient[:] = 0
|
||||||
|
|
||||||
|
self.variational_prior.update_gradients_KL(X)
|
||||||
|
if self.missing_data:
|
||||||
|
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
|
||||||
|
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
|
||||||
|
else:
|
||||||
|
current_values['meangrad'] += kl_fctr*X.mean.gradient
|
||||||
|
current_values['vargrad'] += kl_fctr*X.variance.gradient
|
||||||
|
|
||||||
|
if subset_indices is not None:
|
||||||
|
value_indices['meangrad'] = subset_indices['samples']
|
||||||
|
value_indices['vargrad'] = subset_indices['samples']
|
||||||
|
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
|
||||||
|
|
||||||
|
def _outer_values_update(self, full_values):
|
||||||
|
"""
|
||||||
|
Here you put the values, which were collected before in the right places.
|
||||||
|
E.g. set the gradients of parameters, etc.
|
||||||
|
"""
|
||||||
|
super(BayesianGPLVM, self)._outer_values_update(full_values)
|
||||||
|
self.X.mean.gradient = full_values['meangrad']
|
||||||
|
self.X.variance.gradient = full_values['vargrad']
|
||||||
|
|
||||||
|
def _outer_init_full_values(self):
|
||||||
|
return dict(meangrad=np.zeros(self.X.mean.shape),
|
||||||
|
vargrad=np.zeros(self.X.variance.shape))
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
super(BayesianGPLVM,self).parameters_changed()
|
super(BayesianGPLVM,self).parameters_changed()
|
||||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||||
return
|
return
|
||||||
|
|
||||||
super(BayesianGPLVM, self).parameters_changed()
|
#super(BayesianGPLVM, self).parameters_changed()
|
||||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
#self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||||
|
|
||||||
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
|
#self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
|
||||||
|
|
||||||
# This is testing code -------------------------
|
# This is testing code -------------------------
|
||||||
# i = np.random.randint(self.X.shape[0])
|
# i = np.random.randint(self.X.shape[0])
|
||||||
|
|
@ -113,7 +160,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
# -----------------------------------------------
|
# -----------------------------------------------
|
||||||
|
|
||||||
# update for the KL divergence
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X)
|
#self.variational_prior.update_gradients_KL(self.X)
|
||||||
|
|
||||||
def plot_latent(self, labels=None, which_indices=None,
|
def plot_latent(self, labels=None, which_indices=None,
|
||||||
resolution=50, ax=None, marker='o', s=40,
|
resolution=50, ax=None, marker='o', s=40,
|
||||||
|
|
@ -178,7 +225,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
"""
|
"""
|
||||||
dmu_dX = np.zeros_like(Xnew)
|
dmu_dX = np.zeros_like(Xnew)
|
||||||
for i in range(self.Z.shape[0]):
|
for i in range(self.Z.shape[0]):
|
||||||
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
dmu_dX += self.kern.gradients_X(self.grad_dict['dL_dpsi1'][i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
||||||
return dmu_dX
|
return dmu_dX
|
||||||
|
|
||||||
def dmu_dXnew(self, Xnew):
|
def dmu_dXnew(self, Xnew):
|
||||||
|
|
@ -189,7 +236,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
||||||
ones = np.ones((1, 1))
|
ones = np.ones((1, 1))
|
||||||
for i in range(self.Z.shape[0]):
|
for i in range(self.Z.shape[0]):
|
||||||
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
||||||
return np.dot(gradients_X, self.Cpsi1Vf)
|
return np.dot(gradients_X, self.grad_dict['dL_dpsi1'])
|
||||||
|
|
||||||
def plot_steepest_gradient_map(self, *args, ** kwargs):
|
def plot_steepest_gradient_map(self, *args, ** kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -352,8 +352,8 @@ class TestNoiseModels(object):
|
||||||
print model
|
print model
|
||||||
#print model._get_params()
|
#print model._get_params()
|
||||||
np.testing.assert_almost_equal(
|
np.testing.assert_almost_equal(
|
||||||
model.pdf(f.copy(), Y.copy()),
|
model.pdf(f.copy(), Y.copy()).prod(),
|
||||||
np.exp(model.logpdf(f.copy(), Y.copy()))
|
np.exp(model.logpdf(f.copy(), Y.copy()).sum())
|
||||||
)
|
)
|
||||||
|
|
||||||
@with_setup(setUp, tearDown)
|
@with_setup(setUp, tearDown)
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ class MiscTests(unittest.TestCase):
|
||||||
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
m.likelihood.variance = .5
|
m.likelihood.variance = .5
|
||||||
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.likelihood.variance)
|
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N) * m.likelihood.variance)
|
||||||
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
|
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
|
||||||
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
|
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ class MiscTests(unittest.TestCase):
|
||||||
Z = m.Z[:]
|
Z = m.Z[:]
|
||||||
X = self.X[:]
|
X = self.X[:]
|
||||||
|
|
||||||
#Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
|
# Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
|
||||||
Kinv = m.posterior.woodbury_inv
|
Kinv = m.posterior.woodbury_inv
|
||||||
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
|
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
|
||||||
|
|
||||||
|
|
@ -51,13 +51,13 @@ class MiscTests(unittest.TestCase):
|
||||||
self.assertEquals(mu.shape, (self.N_new, self.D))
|
self.assertEquals(mu.shape, (self.N_new, self.D))
|
||||||
self.assertEquals(covar.shape, (self.N_new, self.N_new))
|
self.assertEquals(covar.shape, (self.N_new, self.N_new))
|
||||||
np.testing.assert_almost_equal(K_hat, covar)
|
np.testing.assert_almost_equal(K_hat, covar)
|
||||||
#np.testing.assert_almost_equal(mu_hat, mu)
|
# np.testing.assert_almost_equal(mu_hat, mu)
|
||||||
|
|
||||||
mu, var = m._raw_predict(self.X_new)
|
mu, var = m._raw_predict(self.X_new)
|
||||||
self.assertEquals(mu.shape, (self.N_new, self.D))
|
self.assertEquals(mu.shape, (self.N_new, self.D))
|
||||||
self.assertEquals(var.shape, (self.N_new, 1))
|
self.assertEquals(var.shape, (self.N_new, 1))
|
||||||
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
|
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
|
||||||
#np.testing.assert_almost_equal(mu_hat, mu)
|
# np.testing.assert_almost_equal(mu_hat, mu)
|
||||||
|
|
||||||
def test_likelihood_replicate(self):
|
def test_likelihood_replicate(self):
|
||||||
m = GPy.models.GPRegression(self.X, self.Y)
|
m = GPy.models.GPRegression(self.X, self.Y)
|
||||||
|
|
@ -110,22 +110,45 @@ class MiscTests(unittest.TestCase):
|
||||||
m2.kern.lengthscale = m.kern['.*lengthscale']
|
m2.kern.lengthscale = m.kern['.*lengthscale']
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
|
|
||||||
|
def test_missing_data(self):
|
||||||
|
from GPy import kern
|
||||||
|
from GPy.models import BayesianGPLVM
|
||||||
|
from GPy.examples.dimensionality_reduction import _simulate_matern
|
||||||
|
|
||||||
|
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
|
||||||
|
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
|
||||||
|
Y = Ylist[0]
|
||||||
|
|
||||||
|
inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
|
||||||
|
Ymissing = Y.copy()
|
||||||
|
Ymissing[inan] = np.nan
|
||||||
|
|
||||||
|
k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
|
||||||
|
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
|
||||||
|
kernel=k, missing_data=True)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
|
||||||
|
k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
|
||||||
|
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
|
||||||
|
kernel=k, missing_data=True)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
|
||||||
def test_likelihood_replicate_kern(self):
|
def test_likelihood_replicate_kern(self):
|
||||||
m = GPy.models.GPRegression(self.X, self.Y)
|
m = GPy.models.GPRegression(self.X, self.Y)
|
||||||
m2 = GPy.models.GPRegression(self.X, self.Y)
|
m2 = GPy.models.GPRegression(self.X, self.Y)
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[''] = m.kern[:]
|
m2.kern[''] = m.kern[:]
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[:] = m.kern[:]
|
m2.kern[:] = m.kern[:]
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[''] = m.kern['']
|
m2.kern[''] = m.kern['']
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
m.kern.randomize()
|
m.kern.randomize()
|
||||||
m2.kern[:] = m.kern[''].values()
|
m2.kern[:] = m.kern[''].values()
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
|
|
||||||
def test_big_model(self):
|
def test_big_model(self):
|
||||||
m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
|
m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
|
||||||
|
|
@ -158,7 +181,7 @@ class MiscTests(unittest.TestCase):
|
||||||
def test_model_optimize(self):
|
def test_model_optimize(self):
|
||||||
X = np.random.uniform(-3., 3., (20, 1))
|
X = np.random.uniform(-3., 3., (20, 1))
|
||||||
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
|
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
|
||||||
m = GPy.models.GPRegression(X,Y)
|
m = GPy.models.GPRegression(X, Y)
|
||||||
m.optimize()
|
m.optimize()
|
||||||
print m
|
print m
|
||||||
|
|
||||||
|
|
@ -219,8 +242,8 @@ class GradientTests(np.testing.TestCase):
|
||||||
mlp = GPy.kern.MLP(1)
|
mlp = GPy.kern.MLP(1)
|
||||||
self.check_model(mlp, model_type='GPRegression', dimension=1)
|
self.check_model(mlp, model_type='GPRegression', dimension=1)
|
||||||
|
|
||||||
#TODO:
|
# TODO:
|
||||||
#def test_GPRegression_poly_1d(self):
|
# def test_GPRegression_poly_1d(self):
|
||||||
# ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
|
# ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
|
||||||
# mlp = GPy.kern.Poly(1, degree=5)
|
# mlp = GPy.kern.Poly(1, degree=5)
|
||||||
# self.check_model(mlp, model_type='GPRegression', dimension=1)
|
# self.check_model(mlp, model_type='GPRegression', dimension=1)
|
||||||
|
|
@ -417,11 +440,11 @@ class GradientTests(np.testing.TestCase):
|
||||||
|
|
||||||
def test_gp_heteroscedastic_regression(self):
|
def test_gp_heteroscedastic_regression(self):
|
||||||
num_obs = 25
|
num_obs = 25
|
||||||
X = np.random.randint(0,140,num_obs)
|
X = np.random.randint(0, 140, num_obs)
|
||||||
X = X[:,None]
|
X = X[:, None]
|
||||||
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
|
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
|
||||||
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
|
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
|
||||||
m = GPy.models.GPHeteroscedasticRegression(X,Y,kern)
|
m = GPy.models.GPHeteroscedasticRegression(X, Y, kern)
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
def test_gp_kronecker_gaussian(self):
|
def test_gp_kronecker_gaussian(self):
|
||||||
|
|
@ -432,12 +455,12 @@ class GradientTests(np.testing.TestCase):
|
||||||
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
||||||
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
||||||
Y = np.random.randn(N1, N2)
|
Y = np.random.randn(N1, N2)
|
||||||
Y = Y-Y.mean(0)
|
Y = Y - Y.mean(0)
|
||||||
Y = Y/Y.std(0)
|
Y = Y / Y.std(0)
|
||||||
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
|
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
|
||||||
|
|
||||||
# build the model the dumb way
|
# build the model the dumb way
|
||||||
assert (N1*N2<1000), "too much data for standard GPs!"
|
assert (N1 * N2 < 1000), "too much data for standard GPs!"
|
||||||
yy, xx = np.meshgrid(X2, X1)
|
yy, xx = np.meshgrid(X2, X1)
|
||||||
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
|
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
|
||||||
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
|
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
|
||||||
|
|
@ -458,11 +481,11 @@ class GradientTests(np.testing.TestCase):
|
||||||
|
|
||||||
def test_gp_VGPC(self):
|
def test_gp_VGPC(self):
|
||||||
num_obs = 25
|
num_obs = 25
|
||||||
X = np.random.randint(0,140,num_obs)
|
X = np.random.randint(0, 140, num_obs)
|
||||||
X = X[:,None]
|
X = X[:, None]
|
||||||
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
|
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
|
||||||
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
|
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
|
||||||
m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern)
|
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,12 +18,3 @@ import multioutput
|
||||||
import linalg_gpu
|
import linalg_gpu
|
||||||
import mpi
|
import mpi
|
||||||
|
|
||||||
try:
|
|
||||||
import sympy
|
|
||||||
_sympy_available = True
|
|
||||||
del sympy
|
|
||||||
except ImportError as e:
|
|
||||||
_sympy_available = False
|
|
||||||
|
|
||||||
if _sympy_available:
|
|
||||||
import symbolic
|
|
||||||
|
|
|
||||||
|
|
@ -188,4 +188,6 @@ class Cache_this(object):
|
||||||
self.ignore_args = ignore_args
|
self.ignore_args = ignore_args
|
||||||
self.force_args = force_kwargs
|
self.force_args = force_kwargs
|
||||||
def __call__(self, f):
|
def __call__(self, f):
|
||||||
return Cacher_wrap(f, self.limit, self.ignore_args, self.force_args)
|
newf = Cacher_wrap(f, self.limit, self.ignore_args, self.force_args)
|
||||||
|
update_wrapper(newf, f)
|
||||||
|
return newf
|
||||||
|
|
|
||||||
|
|
@ -964,7 +964,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
||||||
if not data_available(dataset):
|
if not data_available(dataset):
|
||||||
download_data(dataset)
|
download_data(dataset)
|
||||||
|
|
||||||
from pandas import read_csv
|
from pandas import read_csv, isnull
|
||||||
dir_path = os.path.join(data_path, dataset)
|
dir_path = os.path.join(data_path, dataset)
|
||||||
|
|
||||||
# read the info .soft
|
# read the info .soft
|
||||||
|
|
@ -978,11 +978,26 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
||||||
sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
|
sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
|
||||||
sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
|
sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
|
||||||
sample_info = sample_info[['geo_accession', 'characteristics_ch1', 'description']]
|
sample_info = sample_info[['geo_accession', 'characteristics_ch1', 'description']]
|
||||||
sample_info = sample_info.ix[:, np.r_[0:3, 5:sample_info.shape[1]]]
|
sample_info = sample_info.iloc[:, np.r_[0:4, 5:sample_info.shape[1]]]
|
||||||
c = sample_info.columns.to_series()
|
c = sample_info.columns.to_series()
|
||||||
c[1:4] = ['strain', 'cross', 'developmental_stage']
|
c[1:4] = ['strain', 'cross', 'developmental_stage']
|
||||||
sample_info.columns = c
|
sample_info.columns = c
|
||||||
|
|
||||||
|
# get the labels right:
|
||||||
|
rep = re.compile('\(.*\)')
|
||||||
|
def filter_dev_stage(row):
|
||||||
|
if isnull(row):
|
||||||
|
row = "2-cell stage embryo"
|
||||||
|
if row.startswith("developmental stage: "):
|
||||||
|
row = row[len("developmental stage: "):]
|
||||||
|
if row == 'adult':
|
||||||
|
row += " liver"
|
||||||
|
row = row.replace(' stage ', ' ')
|
||||||
|
row = rep.sub(' ', row)
|
||||||
|
row = row.strip(' ')
|
||||||
|
return row
|
||||||
|
labels = sample_info.developmental_stage.apply(filter_dev_stage)
|
||||||
|
|
||||||
# Extract the tar file
|
# Extract the tar file
|
||||||
filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
|
filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
|
||||||
with tarfile.open(filename, 'r') as files:
|
with tarfile.open(filename, 'r') as files:
|
||||||
|
|
@ -1016,8 +1031,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
||||||
sample_info.index = data.index
|
sample_info.index = data.index
|
||||||
|
|
||||||
# get the labels from the description
|
# get the labels from the description
|
||||||
rep = re.compile('fibroblast|\d+-cell|embryo|liver|blastocyst|blastomere|zygote', re.IGNORECASE)
|
#rep = re.compile('fibroblast|\d+-cell|embryo|liver|early blastocyst|mid blastocyst|late blastocyst|blastomere|zygote', re.IGNORECASE)
|
||||||
labels = sample_info.developmental_stage.apply(lambda row: " ".join(rep.findall(row)))
|
|
||||||
|
|
||||||
sys.stdout.write(' '*len(message) + '\r')
|
sys.stdout.write(' '*len(message) + '\r')
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -5,15 +5,15 @@ Created on 24 Feb 2014
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPy.util.pca import pca
|
from GPy.util.pca import PCA
|
||||||
|
|
||||||
def initialize_latent(init, input_dim, Y):
|
def initialize_latent(init, input_dim, Y):
|
||||||
Xr = np.asfortranarray(np.random.randn(Y.shape[0], input_dim))
|
Xr = np.asfortranarray(np.random.randn(Y.shape[0], input_dim))
|
||||||
if init == 'PCA':
|
if init == 'PCA':
|
||||||
p = pca(Y)
|
p = PCA(Y)
|
||||||
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
||||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||||
var = p.fracs[:input_dim]
|
var = .1*p.fracs[:input_dim]
|
||||||
else:
|
else:
|
||||||
var = Xr.var(0)
|
var = Xr.var(0)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,14 +11,16 @@ try:
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
from numpy.linalg.linalg import LinAlgError
|
from numpy.linalg.linalg import LinAlgError
|
||||||
|
from operator import setitem
|
||||||
|
import itertools
|
||||||
|
|
||||||
class pca(object):
|
class PCA(object):
|
||||||
"""
|
"""
|
||||||
pca module with automatic primal/dual determination.
|
PCA module with automatic primal/dual determination.
|
||||||
"""
|
"""
|
||||||
def __init__(self, X):
|
def __init__(self, X):
|
||||||
self.mu = X.mean(0)
|
self.mu = None
|
||||||
self.sigma = X.std(0)
|
self.sigma = None
|
||||||
|
|
||||||
X = self.center(X)
|
X = self.center(X)
|
||||||
|
|
||||||
|
|
@ -37,8 +39,15 @@ class pca(object):
|
||||||
|
|
||||||
def center(self, X):
|
def center(self, X):
|
||||||
"""
|
"""
|
||||||
Center `X` in pca space.
|
Center `X` in PCA space.
|
||||||
"""
|
"""
|
||||||
|
X = X.copy()
|
||||||
|
inan = numpy.isnan(X)
|
||||||
|
if self.mu is None:
|
||||||
|
X_ = numpy.ma.masked_array(X, inan)
|
||||||
|
self.mu = X_.mean(0).base
|
||||||
|
self.sigma = X_.std(0).base
|
||||||
|
reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None)
|
||||||
X = X - self.mu
|
X = X - self.mu
|
||||||
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
|
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
|
||||||
return X
|
return X
|
||||||
|
|
@ -57,7 +66,7 @@ class pca(object):
|
||||||
|
|
||||||
def project(self, X, Q=None):
|
def project(self, X, Q=None):
|
||||||
"""
|
"""
|
||||||
Project X into pca space, defined by the Q highest eigenvalues.
|
Project X into PCA space, defined by the Q highest eigenvalues.
|
||||||
Y = X dot V
|
Y = X dot V
|
||||||
"""
|
"""
|
||||||
if Q is None:
|
if Q is None:
|
||||||
|
|
@ -71,13 +80,16 @@ class pca(object):
|
||||||
"""
|
"""
|
||||||
Plot fractions of Eigenvalues sorted in descending order.
|
Plot fractions of Eigenvalues sorted in descending order.
|
||||||
"""
|
"""
|
||||||
|
from GPy.plotting.matplot_dep import Tango
|
||||||
|
Tango.reset()
|
||||||
|
col = Tango.nextMedium()
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pylab.figure(fignum)
|
fig = pylab.figure(fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
if Q is None:
|
if Q is None:
|
||||||
Q = self.Q
|
Q = self.Q
|
||||||
ticks = numpy.arange(Q)
|
ticks = numpy.arange(Q)
|
||||||
bar = ax.bar(ticks - .4, self.fracs[:Q])
|
bar = ax.bar(ticks - .4, self.fracs[:Q], color=col)
|
||||||
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
|
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
|
||||||
ax.set_ylabel("Eigenvalue fraction")
|
ax.set_ylabel("Eigenvalue fraction")
|
||||||
ax.set_xlabel("PC")
|
ax.set_xlabel("PC")
|
||||||
|
|
|
||||||
|
|
@ -1,367 +0,0 @@
|
||||||
import sys
|
|
||||||
import numpy as np
|
|
||||||
import sympy as sym
|
|
||||||
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
|
|
||||||
from sympy.matrices import Matrix
|
|
||||||
########################################
|
|
||||||
## Try to do some matrix functions: problem, you can't do derivatives
|
|
||||||
## with respect to matrix functions :-(
|
|
||||||
|
|
||||||
class GPySymMatrix(Matrix):
|
|
||||||
def __init__(self, indices):
|
|
||||||
Matrix.__init__(self)
|
|
||||||
def atoms(self):
|
|
||||||
return [e2 for e in self for e2 in e.atoms()]
|
|
||||||
|
|
||||||
class selector(Function):
|
|
||||||
"""A function that returns an element of a Matrix depending on input indices."""
|
|
||||||
nargs = 3
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
return selector(*self.args)
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, X, i, j):
|
|
||||||
if i.is_Number and j.is_Number:
|
|
||||||
return X[i, j]
|
|
||||||
|
|
||||||
##################################################
|
|
||||||
class logistic(Function):
|
|
||||||
"""The logistic function as a symbolic function."""
|
|
||||||
nargs = 1
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
x = self.args[0]
|
|
||||||
return logistic(x)*(1-logistic(x))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
if x.is_Number:
|
|
||||||
return 1/(1+exp(-x))
|
|
||||||
|
|
||||||
class logisticln(Function):
|
|
||||||
"""The log logistic, which can often be computed with more precision than the simply taking log(logistic(x)) when x is small or large."""
|
|
||||||
nargs = 1
|
|
||||||
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
x = self.args[0]
|
|
||||||
return 1-logistic(x)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
if x.is_Number:
|
|
||||||
return -np.log(1+exp(-x))
|
|
||||||
|
|
||||||
class erfc(Function):
|
|
||||||
"""The complementary error function, erfc(x) = 1-erf(x). Used as a helper function, particularly for erfcx, the scaled complementary error function. and the normal distributions cdf."""
|
|
||||||
nargs = 1
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, arg):
|
|
||||||
return 1-erf(arg)
|
|
||||||
|
|
||||||
class erfcx(Function):
|
|
||||||
nargs = 1
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
x = self.args[0]
|
|
||||||
return x*erfcx(x)-2/sqrt(pi)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
if x.is_Number:
|
|
||||||
return exp(x**2)*erfc(x)
|
|
||||||
|
|
||||||
class gammaln(Function):
|
|
||||||
"""The log of the gamma function, which is often needed instead of log(gamma(x)) for better accuracy for large x."""
|
|
||||||
nargs = 1
|
|
||||||
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
x=self.args[0]
|
|
||||||
return polygamma(0, x)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
if x.is_Number:
|
|
||||||
return log(gamma(x))
|
|
||||||
|
|
||||||
|
|
||||||
class normcdfln(Function):
|
|
||||||
"""The log of the normal cdf. Can often be computed with better accuracy than log(normcdf(x)), particulary when x is either small or large."""
|
|
||||||
nargs = 1
|
|
||||||
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
x = self.args[0]
|
|
||||||
#return -erfcx(-x/sqrt(2))/sqrt(2*pi)
|
|
||||||
#return exp(-normcdfln(x) - 0.5*x*x)/sqrt(2*pi)
|
|
||||||
return sqrt(2/pi)*1/erfcx(-x/sqrt(2))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
if x.is_Number:
|
|
||||||
return log(normcdf(x))
|
|
||||||
|
|
||||||
def _eval_is_real(self):
|
|
||||||
return self.args[0].is_real
|
|
||||||
|
|
||||||
class normcdf(Function):
|
|
||||||
"""The cumulative distribution function of the standard normal. Provided as a convenient helper function. It is computed throught -0.5*erfc(-x/sqrt(2))."""
|
|
||||||
nargs = 1
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
x = self.args[0]
|
|
||||||
return gaussian(x)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
if x.is_Number:
|
|
||||||
return 0.5*(erfc(-x/sqrt(2)))
|
|
||||||
|
|
||||||
def _eval_is_real(self):
|
|
||||||
return self.args[0].is_real
|
|
||||||
|
|
||||||
class normalln(Function):
|
|
||||||
"""The log of the standard normal distribution."""
|
|
||||||
nargs = 1
|
|
||||||
def fdiff(self, argindex=1):
|
|
||||||
x = self.args[0]
|
|
||||||
return -x
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
if x.is_Number:
|
|
||||||
return 0.5*sqrt(2*pi) - 0.5*x*x
|
|
||||||
|
|
||||||
|
|
||||||
def _eval_is_real(self):
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
class normal(Function):
|
|
||||||
"""The standard normal distribution. Provided as a convenience function."""
|
|
||||||
nargs = 1
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x):
|
|
||||||
return 1/sqrt(2*pi)*exp(-0.5*x*x)
|
|
||||||
|
|
||||||
def _eval_is_real(self):
|
|
||||||
return True
|
|
||||||
|
|
||||||
class differfln(Function):
|
|
||||||
nargs = 2
|
|
||||||
|
|
||||||
def fdiff(self, argindex=2):
|
|
||||||
if argindex == 2:
|
|
||||||
x0, x1 = self.args
|
|
||||||
return -2/(sqrt(pi)*(erfcx(x1)-exp(x1**2-x0**2)*erfcx(x0)))
|
|
||||||
elif argindex == 1:
|
|
||||||
x0, x1 = self.args
|
|
||||||
return 2/(sqrt(pi)*(exp(x0**2-x1**2)*erfcx(x1)-erfcx(x0)))
|
|
||||||
else:
|
|
||||||
raise ArgumentIndexError(self, argindex)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, x0, x1):
|
|
||||||
if x0.is_Number and x1.is_Number:
|
|
||||||
return log(erfc(x1)-erfc(x0))
|
|
||||||
|
|
||||||
class dh_dd_i(Function):
|
|
||||||
nargs = 5
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, t, tprime, d_i, d_j, l):
|
|
||||||
if (t.is_Number
|
|
||||||
and tprime.is_Number
|
|
||||||
and d_i.is_Number
|
|
||||||
and d_j.is_Number
|
|
||||||
and l.is_Number):
|
|
||||||
|
|
||||||
diff_t = (t-tprime)
|
|
||||||
l2 = l*l
|
|
||||||
h = h(t, tprime, d_i, d_j, l)
|
|
||||||
half_l_di = 0.5*l*d_i
|
|
||||||
arg_1 = half_l_di + tprime/l
|
|
||||||
arg_2 = half_l_di - (t-tprime)/l
|
|
||||||
ln_part_1 = ln_diff_erf(arg_1, arg_2)
|
|
||||||
arg_1 = half_l_di
|
|
||||||
arg_2 = half_l_di - t/l
|
|
||||||
sign_val = sign(t/l)
|
|
||||||
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
|
|
||||||
|
|
||||||
base = ((0.5*d_i*l2*(d_i+d_j)-1)*h
|
|
||||||
+ (-diff_t*sign_val*exp(half_l_di*half_l_di
|
|
||||||
-d_i*diff_t
|
|
||||||
+ln_part_1)
|
|
||||||
+t*sign_val*exp(half_l_di*half_l_di
|
|
||||||
-d_i*t-d_j*tprime
|
|
||||||
+ln_part_2))
|
|
||||||
+ l/sqrt(pi)*(-exp(-diff_t*diff_t/l2)
|
|
||||||
+exp(-tprime*tprime/l2-d_i*t)
|
|
||||||
+exp(-t*t/l2-d_j*tprime)
|
|
||||||
-exp(-(d_i*t + d_j*tprime))))
|
|
||||||
return base/(d_i+d_j)
|
|
||||||
|
|
||||||
class dh_dd_j(Function):
|
|
||||||
nargs = 5
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, t, tprime, d_i, d_j, l):
|
|
||||||
if (t.is_Number
|
|
||||||
and tprime.is_Number
|
|
||||||
and d_i.is_Number
|
|
||||||
and d_j.is_Number
|
|
||||||
and l.is_Number):
|
|
||||||
diff_t = (t-tprime)
|
|
||||||
l2 = l*l
|
|
||||||
half_l_di = 0.5*l*d_i
|
|
||||||
h = h(t, tprime, d_i, d_j, l)
|
|
||||||
arg_1 = half_l_di + tprime/l
|
|
||||||
arg_2 = half_l_di - (t-tprime)/l
|
|
||||||
ln_part_1 = ln_diff_erf(arg_1, arg_2)
|
|
||||||
arg_1 = half_l_di
|
|
||||||
arg_2 = half_l_di - t/l
|
|
||||||
sign_val = sign(t/l)
|
|
||||||
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
|
|
||||||
sign_val = sign(t/l)
|
|
||||||
base = tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2)-h
|
|
||||||
return base/(d_i+d_j)
|
|
||||||
|
|
||||||
class dh_dl(Function):
|
|
||||||
nargs = 5
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, t, tprime, d_i, d_j, l):
|
|
||||||
if (t.is_Number
|
|
||||||
and tprime.is_Number
|
|
||||||
and d_i.is_Number
|
|
||||||
and d_j.is_Number
|
|
||||||
and l.is_Number):
|
|
||||||
|
|
||||||
diff_t = (t-tprime)
|
|
||||||
l2 = l*l
|
|
||||||
h = h(t, tprime, d_i, d_j, l)
|
|
||||||
return 0.5*d_i*d_i*l*h + 2./(sqrt(pi)*(d_i+d_j))*((-diff_t/l2-d_i/2.)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2.)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2.)*exp(-t*t/l2-d_j*tprime)-d_i/2.*exp(-(d_i*t+d_j*tprime)))
|
|
||||||
|
|
||||||
class dh_dt(Function):
|
|
||||||
nargs = 5
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, t, tprime, d_i, d_j, l):
|
|
||||||
if (t.is_Number
|
|
||||||
and tprime.is_Number
|
|
||||||
and d_i.is_Number
|
|
||||||
and d_j.is_Number
|
|
||||||
and l.is_Number):
|
|
||||||
if (t is S.NaN
|
|
||||||
or tprime is S.NaN
|
|
||||||
or d_i is S.NaN
|
|
||||||
or d_j is S.NaN
|
|
||||||
or l is S.NaN):
|
|
||||||
return S.NaN
|
|
||||||
else:
|
|
||||||
half_l_di = 0.5*l*d_i
|
|
||||||
arg_1 = half_l_di + tprime/l
|
|
||||||
arg_2 = half_l_di - (t-tprime)/l
|
|
||||||
ln_part_1 = ln_diff_erf(arg_1, arg_2)
|
|
||||||
arg_1 = half_l_di
|
|
||||||
arg_2 = half_l_di - t/l
|
|
||||||
sign_val = sign(t/l)
|
|
||||||
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
|
|
||||||
|
|
||||||
|
|
||||||
return (sign_val*exp(half_l_di*half_l_di
|
|
||||||
- d_i*(t-tprime)
|
|
||||||
+ ln_part_1
|
|
||||||
- log(d_i + d_j))
|
|
||||||
- sign_val*exp(half_l_di*half_l_di
|
|
||||||
- d_i*t - d_j*tprime
|
|
||||||
+ ln_part_2
|
|
||||||
- log(d_i + d_j))).diff(t)
|
|
||||||
|
|
||||||
class dh_dtprime(Function):
|
|
||||||
nargs = 5
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, t, tprime, d_i, d_j, l):
|
|
||||||
if (t.is_Number
|
|
||||||
and tprime.is_Number
|
|
||||||
and d_i.is_Number
|
|
||||||
and d_j.is_Number
|
|
||||||
and l.is_Number):
|
|
||||||
if (t is S.NaN
|
|
||||||
or tprime is S.NaN
|
|
||||||
or d_i is S.NaN
|
|
||||||
or d_j is S.NaN
|
|
||||||
or l is S.NaN):
|
|
||||||
return S.NaN
|
|
||||||
else:
|
|
||||||
half_l_di = 0.5*l*d_i
|
|
||||||
arg_1 = half_l_di + tprime/l
|
|
||||||
arg_2 = half_l_di - (t-tprime)/l
|
|
||||||
ln_part_1 = ln_diff_erf(arg_1, arg_2)
|
|
||||||
arg_1 = half_l_di
|
|
||||||
arg_2 = half_l_di - t/l
|
|
||||||
sign_val = sign(t/l)
|
|
||||||
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
|
|
||||||
|
|
||||||
|
|
||||||
return (sign_val*exp(half_l_di*half_l_di
|
|
||||||
- d_i*(t-tprime)
|
|
||||||
+ ln_part_1
|
|
||||||
- log(d_i + d_j))
|
|
||||||
- sign_val*exp(half_l_di*half_l_di
|
|
||||||
- d_i*t - d_j*tprime
|
|
||||||
+ ln_part_2
|
|
||||||
- log(d_i + d_j))).diff(tprime)
|
|
||||||
|
|
||||||
|
|
||||||
class h(Function):
|
|
||||||
nargs = 5
|
|
||||||
def fdiff(self, argindex=5):
|
|
||||||
t, tprime, d_i, d_j, l = self.args
|
|
||||||
if argindex == 1:
|
|
||||||
return dh_dt(t, tprime, d_i, d_j, l)
|
|
||||||
elif argindex == 2:
|
|
||||||
return dh_dtprime(t, tprime, d_i, d_j, l)
|
|
||||||
elif argindex == 3:
|
|
||||||
return dh_dd_i(t, tprime, d_i, d_j, l)
|
|
||||||
elif argindex == 4:
|
|
||||||
return dh_dd_j(t, tprime, d_i, d_j, l)
|
|
||||||
elif argindex == 5:
|
|
||||||
return dh_dl(t, tprime, d_i, d_j, l)
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, t, tprime, d_i, d_j, l):
|
|
||||||
if (t.is_Number
|
|
||||||
and tprime.is_Number
|
|
||||||
and d_i.is_Number
|
|
||||||
and d_j.is_Number
|
|
||||||
and l.is_Number):
|
|
||||||
if (t is S.NaN
|
|
||||||
or tprime is S.NaN
|
|
||||||
or d_i is S.NaN
|
|
||||||
or d_j is S.NaN
|
|
||||||
or l is S.NaN):
|
|
||||||
return S.NaN
|
|
||||||
else:
|
|
||||||
half_l_di = 0.5*l*d_i
|
|
||||||
arg_1 = half_l_di + tprime/l
|
|
||||||
arg_2 = half_l_di - (t-tprime)/l
|
|
||||||
ln_part_1 = ln_diff_erf(arg_1, arg_2)
|
|
||||||
arg_1 = half_l_di
|
|
||||||
arg_2 = half_l_di - t/l
|
|
||||||
sign_val = sign(t/l)
|
|
||||||
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
|
|
||||||
|
|
||||||
|
|
||||||
return (sign_val*exp(half_l_di*half_l_di
|
|
||||||
- d_i*(t-tprime)
|
|
||||||
+ ln_part_1
|
|
||||||
- log(d_i + d_j))
|
|
||||||
- sign_val*exp(half_l_di*half_l_di
|
|
||||||
- d_i*t - d_j*tprime
|
|
||||||
+ ln_part_2
|
|
||||||
- log(d_i + d_j)))
|
|
||||||
|
|
||||||
|
|
||||||
# return (exp((d_j/2.*l)**2)/(d_i+d_j)
|
|
||||||
# *(exp(-d_j*(tprime - t))
|
|
||||||
# *(erf((tprime-t)/l - d_j/2.*l)
|
|
||||||
# + erf(t/l + d_j/2.*l))
|
|
||||||
# - exp(-(d_j*tprime + d_i))
|
|
||||||
# *(erf(tprime/l - d_j/2.*l)
|
|
||||||
# + erf(d_j/2.*l))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue