diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index e8b6419f..02b38a79 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -170,28 +170,18 @@ def student_t_f_check(): m.likelihood.X = X #print m plt.figure() - plt.subplot(511) + plt.subplot(211) m.plot() - #print m - plt.subplot(512) - m.optimize(max_f_eval=15) - m.plot() - #print m - plt.subplot(513) - m.optimize(max_f_eval=15) - m.plot() - #print m - plt.subplot(514) - m.optimize(max_f_eval=15) - m.plot() - #print m - plt.subplot(515) + print "OPTIMIZED ONCE" + plt.subplot(212) m.optimize() m.plot() print "final optimised student t" print m print "real GP" print mgp + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + return m def student_t_fix_optimise_check(): plt.close('all') @@ -602,3 +592,48 @@ def noisy_laplace_approx(): print m #with a student t distribution, since it has heavy tails it should work well + +def gaussian_f_check(): + plt.close('all') + X = np.linspace(0, 1, 50)[:, None] + real_std = 0.2 + noise = np.random.randn(*X.shape)*real_std + Y = np.sin(X*2*np.pi) + noise + + kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1]) + mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) + mgp.ensure_default_constraints() + mgp.randomize() + mgp.optimize() + print "Gaussian" + print mgp + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + + kernelg = kernelgp.copy() + #kernelst += GPy.kern.bias(X.shape[1]) + N, D = X.shape + g_distribution = GPy.likelihoods.likelihood_functions.gaussian(variance=0.1, N=N, D=D) + g_likelihood = GPy.likelihoods.Laplace(Y.copy(), g_distribution, opt='rasm') + m = GPy.models.GP(X, g_likelihood, kernelg) + #m['rbf_v'] = mgp._get_params()[0] + #m['rbf_l'] = mgp._get_params()[1] + 1 + m.ensure_default_constraints() + #m.constrain_fixed('rbf_v', mgp._get_params()[0]) + #m.constrain_fixed('rbf_l', mgp._get_params()[1]) + #m.constrain_bounded('t_no', 2*real_std**2, 1e3) + #m.constrain_positive('bias') + m.constrain_positive('noise_var') + m.randomize() + m['noise_variance'] = 0.1 + m.likelihood.X = X + plt.figure() + plt.subplot(211) + m.plot() + plt.subplot(212) + m.optimize() + m.plot() + print "final optimised student t" + print m + print "real GP" + print mgp + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 42af9c8d..81d93f6b 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -9,7 +9,7 @@ from ..util.plot import gpplot from scipy.special import gammaln, gamma from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -class likelihood_function: +class likelihood_function(object): """ Likelihood class for doing Expectation propagation :param Y: observed output (Nx1 numpy.darray) @@ -159,7 +159,7 @@ class student_t(likelihood_function): d2ln p(yi|fi)_d2fifj """ def __init__(self, deg_free, sigma2=2): - #super(student_t, self).__init__() + super(student_t, self).__init__() self.v = deg_free self.sigma2 = sigma2 self.log_concave = False @@ -468,9 +468,16 @@ class gaussian(likelihood_function): """ Gaussian likelihood - this is a test class for approximation schemes """ - def __init__(self, variance): + def __init__(self, variance, D, N): + super(gaussian, self).__init__() + self.D = D + self.N = N self._set_params(np.asarray(variance)) + #Don't support normalizing yet + self._bias = np.zeros((1, self.D)) + self._scale = np.ones((1, self.D)) + def _get_params(self): return np.asarray(self._variance) @@ -481,7 +488,8 @@ class gaussian(likelihood_function): self._variance = float(x) self.I = np.eye(self.N) self.covariance_matrix = self.I * self._variance - self.Ki, _, _, self.ln_K = pdinv(self.covariance_matrix) # THIS MAY BE WRONG + self.Ki = self.I*(1.0 / self._variance) + self.ln_K = np.trace(self.covariance_matrix) def link_function(self, y, f, extra_data=None): """link_function $\ln p(y|f)$ @@ -498,7 +506,8 @@ class gaussian(likelihood_function): eeT = np.dot(e, e.T) objective = (- 0.5*self.D*np.log(2*np.pi) - 0.5*self.ln_K - - 0.5*np.sum(np.multiply(self.Ki, eeT)) + #- 0.5*np.sum(np.multiply(self.Ki, eeT)) + - 0.5*np.dot(np.dot(e.T, self.Ki), e) ) return np.sum(objective) @@ -514,7 +523,7 @@ class gaussian(likelihood_function): """ assert y.shape == f.shape s2_i = (1.0/self._variance)*self.I - grad = np.dot(s2_i, y) - 0.5*np.dot(s2_i, f) + grad = np.dot(s2_i, y) - np.dot(s2_i, f) return grad def d2lik_d2f(self, y, f, extra_data=None): @@ -532,7 +541,7 @@ class gaussian(likelihood_function): """ assert y.shape == f.shape s2_i = (1.0/self._variance)*self.I - hess = np.diagonal(-0.5*s2_i) + hess = np.diag(-s2_i)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? return hess def d3lik_d3f(self, y, f, extra_data=None): @@ -542,7 +551,7 @@ class gaussian(likelihood_function): $$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$ """ assert y.shape == f.shape - d3lik_d3f = np.diagonal(0*self.I) + d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? return d3lik_d3f def lik_dstd(self, y, f, extra_data=None): @@ -551,7 +560,7 @@ class gaussian(likelihood_function): """ assert y.shape == f.shape e = y - f - dlik_dsigma = -0.5*self.N*self._variance - 0.5*np.dot(e.T, e) + dlik_dsigma = -0.5*self.D/self._variance - 0.5*np.trace(np.dot(e.T, np.dot(self.I, e))) return dlik_dsigma def dlik_df_dstd(self, y, f, extra_data=None): @@ -560,7 +569,7 @@ class gaussian(likelihood_function): """ assert y.shape == f.shape s_4 = 1.0/(self._variance**2) - dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f)) + dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + np.dot(s_4, np.dot(self.I, f)) return dlik_grad_dsigma def d2lik_d2f_dstd(self, y, f, extra_data=None): @@ -570,7 +579,7 @@ class gaussian(likelihood_function): $$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$ """ assert y.shape == f.shape - dlik_hess_dsigma = 1.0/(2*(self._variance**2)) + dlik_hess_dsigma = np.diag(1.0/(self._variance**2)*self.I)[:, None] return dlik_hess_dsigma def _gradients(self, y, f, extra_data=None): @@ -584,3 +593,10 @@ class gaussian(likelihood_function): assert len(derivs[1]) == len(self._get_param_names()) assert len(derivs[2]) == len(self._get_param_names()) return derivs + + def predictive_values(self, mu, var): + mean = mu * self._scale + self._bias + true_var = (var + self._variance) * self._scale ** 2 + _5pc = mean - 2.*np.sqrt(true_var) + _95pc = mean + 2.*np.sqrt(true_var) + return mean, true_var, _5pc, _95pc