diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/__init__.py b/python/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/examples/__init__.py b/python/examples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/examples/laplace_approximations.py b/python/examples/laplace_approximations.py index 2f059831..0e1d3305 100644 --- a/python/examples/laplace_approximations.py +++ b/python/examples/laplace_approximations.py @@ -1,8 +1,9 @@ import GPy import numpy as np -import scipy as sp -import scipy.stats import matplotlib.pyplot as plt +from scipy.stats import t +from coxGP.python.likelihoods.Laplace import Laplace +from coxGP.python.likelihoods.likelihood_function import student_t def student_t_approx(): @@ -13,6 +14,41 @@ def student_t_approx(): X = np.sort(np.random.uniform(0, 15, 70))[:, None] Y = np.sin(X) + #Add student t random noise to datapoints + deg_free = 1 + noise = t.rvs(deg_free, loc=1.8, scale=1, size=Y.shape) + Y += noise + + # Kernel object + print X.shape + kernel = GPy.kern.rbf(X.shape[1]) + + #A GP should completely break down due to the points as they get a lot of weight + # create simple GP model + m = GPy.models.GP_regression(X, Y, kernel=kernel) + + # optimize + m.ensure_default_constraints() + m.optimize() + # plot + #m.plot() + print m + + #with a student t distribution, since it has heavy tails it should work well + likelihood_function = student_t(deg_free, sigma=1) + lap = Laplace(Y, likelihood_function) + cov = kernel.K(X) + lap.fit_full(cov) + + +def noisy_laplace_approx(): + """ + Example of regressing with a student t likelihood + """ + #Start a function, any function + X = np.sort(np.random.uniform(0, 15, 70))[:, None] + Y = np.sin(X) + #Add some extreme value noise to some of the datapoints percent_corrupted = 0.05 corrupted_datums = int(np.round(Y.shape[0] * percent_corrupted)) @@ -20,12 +56,12 @@ def student_t_approx(): np.random.shuffle(indices) corrupted_indices = indices[:corrupted_datums] print corrupted_indices - noise = np.random.uniform(-10,10,(len(corrupted_indices), 1)) + noise = np.random.uniform(-10, 10, (len(corrupted_indices), 1)) Y[corrupted_indices] += noise #A GP should completely break down due to the points as they get a lot of weight # create simple GP model - m = GPy.models.GP_regression(X,Y) + m = GPy.models.GP_regression(X, Y) # optimize m.ensure_default_constraints() diff --git a/python/likelihoods/Laplace.py b/python/likelihoods/Laplace.py index a0dbc65c..6efbfa30 100644 --- a/python/likelihoods/Laplace.py +++ b/python/likelihoods/Laplace.py @@ -1,8 +1,14 @@ -import nump as np +import numpy as np +import scipy as sp import GPy from GPy.util.linalg import jitchol +from functools import partial +from GPy.likelihoods.likelihood import likelihood +from GPy.util.linalg import pdinv,mdot -class Laplace(GPy.likelihoods.likelihood): + + +class Laplace(likelihood): """Laplace approximation to a posterior""" def __init__(self,data,likelihood_function): @@ -23,8 +29,6 @@ class Laplace(GPy.likelihoods.likelihood): :likelihood_function: @todo """ - GPy.likelihoods.likelihood.__init__(self) - self.data = data self.likelihood_function = likelihood_function @@ -38,7 +42,7 @@ class Laplace(GPy.likelihoods.likelihood): GPy expects a likelihood to be gaussian, so need to caluclate the points Y^{squiggle} and Z^{squiggle} that makes the posterior match that found by a laplace approximation to a non-gaussian likelihood """ - raise NotImplementedError + z_hat = N(f_hat|f_hat, hess_hat) / self.height_unnormalised def fit_full(self, K): """ @@ -46,9 +50,38 @@ class Laplace(GPy.likelihoods.likelihood): For nomenclature see Rasmussen & Williams 2006 :K: Covariance matrix """ - self.f = np.zeros(self.N) + f = np.zeros((self.N, 1)) + print K.shape + print f.shape + print self.data.shape + (Ki, _, _, log_Kdet) = pdinv(K) + obj_constant = (0.5 * log_Kdet) - ((0.5 * self.N) * np.log(2*np.pi)) #Find \hat(f) using a newton raphson optimizer for example + #TODO: Add newton-raphson as subclass of optimizer class + + #FIXME: Can we get rid of this horrible reshaping? + def obj(f): + f = f[:, None] + res = -1 * (self.likelihood_function.link_function(self.data, f) - 0.5 * mdot(f.T, (Ki, f)) + obj_constant) + return float(res) + + def obj_grad(f): + f = f[:, None] + res = -1 * (self.likelihood_function.link_grad(self.data, f) - mdot(Ki, f)) + return np.squeeze(res) + + def obj_hess(f): + f = f[:, None] + res = -1 * (np.diag(self.likelihood_function.link_hess(self.data, f)) - Ki) + return np.squeeze(res) + + self.f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess) #At this point get the hessian matrix + self.hess_hat = obj_hess(f_hat) + #Need to add the constant as we previously were trying to avoid computing it (seems like a small overhead though...) + self.height_unnormalised = obj(f_hat) #FIXME: Is it -1? + + return _compute_GP_variables() diff --git a/python/likelihoods/__init__.py b/python/likelihoods/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/likelihoods/likelihood_function.py b/python/likelihoods/likelihood_function.py index 5d4e51ce..78731199 100644 --- a/python/likelihoods/likelihood_function.py +++ b/python/likelihoods/likelihood_function.py @@ -1,62 +1,72 @@ -import GPy -from scipy.special import gamma, gammaln +from scipy.special import gammaln +import numpy as np +from GPy.likelihoods.likelihood_functions import likelihood_function -class student_t(GPy.likelihoods.likelihood_function): + +class student_t(likelihood_function): """Student t likelihood distribution For nomanclature see Bayesian Data Analysis 2003 p576 - $$\ln(\frac{\Gamma(\frac{(v+1)}{2})}{\Gamma(\sqrt(v \pi \Gamma(\frac{v}{2}))})+ \ln(1+\frac{(y_i-f_i)^2}{\sigma v})^{-\frac{(v+1)}{2}}$$ - TODO:Double check this + $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$ Laplace: Needs functions to calculate ln p(yi|fi) dln p(yi|fi)_dfi - d2ln p(yi|fi)_d2fi + d2ln p(yi|fi)_d2fifj """ def __init__(self, deg_free, sigma=1): self.v = deg_free self.sigma = 1 - def link_function(self, y_i, f_i): - """link_function $\ln p(y_i|f_i)$ - $$\ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2}) - \ln \frac{v \pi \sigma}{2} - \frac{v+1}{2}\ln (1 + \frac{(y_{i} - f_{i})^{2}}{v\sigma})$$ - TODO: Double check this + def link_function(self, y, f): + """link_function $\ln p(y|f)$ + $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$ - :y_i: datum number i - :f_i: latent variable f_i + :y: datum number i + :f: latent variable f :returns: float(likelihood evaluated for this point) """ - e = y_i - f_i - return gammaln((v+1)*0.5) - gammaln(v*0.5) - np.ln(v*np.pi*sigma)*0.5 - (v+1)*0.5*np.ln(1 + ((e/sigma)**2)/v) #Check the /v! + e = y - f + #print "Link ", y.shape, f.shape, e.shape + objective = (gammaln((self.v + 1) * 0.5) + - gammaln(self.v * 0.5) + + np.log(self.sigma * np.sqrt(self.v * np.pi)) + - (self.v + 1) * 0.5 + * np.log(1 + ((e**2 / self.sigma**2) / self.v)) + ) + return np.sum(objective) - def link_grad(self, y_i, f_i): - """gradient of the link function at y_i, given f_i w.r.t f_i + def link_grad(self, y, f): + """ + Gradient of the link function at y, given f w.r.t f - derivative of log((gamma((v+1)/2)/gamma(sqrt(v*pi*gamma(v/2))))*(1+(t^2)/(a*v))^((-(v+1))/2)) with respect to t - $$\frac{(y_i - f_i)(v + 1)}{\sigma v (y_{i} - f_{i})^{2}}$$ - TODO: Double check this + $$\frac{d}{df}p(y_{i}|f_{i}) = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ - :y_i: datum number i - :f_i: latent variable f_i + :y: datum number i + :f: latent variable f :returns: float(gradient of likelihood evaluated at this point) """ - pass - - def link_hess(self, y_i, f_i, f_j): - """hessian at this point (the hessian will be 0 unless i == j) - i.e. second derivative w.r.t f_i and f_j - - second derivative of - - :y_i: @todo - :f_i: @todo - :f_j: @todo - :returns: @todo + e = y - f + #print "Grad ", y.shape, f.shape, e.shape + grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2)) + return grad + def link_hess(self, y, f): """ - if f_i = - pass + Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j + i.e. second derivative link_function at y given f f_j w.r.t f and f_j + Will return diaganol of hessian, since every where else it is 0 + + $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ + + :y: datum number i + :f: latent variable f + :returns: float(second derivative of likelihood evaluated at this point) + """ + e = y - f + hess = ((self.v + 1) * e) / ((((self.sigma**2)*self.v) + e**2)**2) + return hess diff --git a/python/models/__init__.py b/python/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/testing/__init__.py b/python/testing/__init__.py new file mode 100644 index 00000000..e69de29b