import GPy
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t, norm
from coxGP.python.likelihoods.Laplace import Laplace
from coxGP.python.likelihoods.likelihood_function import student_t


def student_t_approx():
    """
    Example of regressing with a student t likelihood
    """
    real_var = 0.1
    #Start a function, any function
    X = np.linspace(0.0, 10.0, 30)[:, None]
    Y = np.sin(X) + np.random.randn(*X.shape)*real_var
    Yc = Y.copy()

    X_full = np.linspace(0.0, 10.0, 500)[:, None]
    Y_full = np.sin(X_full)

    #Y = Y/Y.max()

    Yc[10] += 100
    Yc[25] += 10
    Yc[23] += 10
    Yc[24] += 10
    #Yc = Yc/Yc.max()

    #Add student t random noise to datapoints
    deg_free = 10
    real_sd = np.sqrt(real_var)
    #t_rv = t(deg_free, loc=0, scale=real_var)
    #noise = t_rvrvs(size=Y.shape)
    #Y += noise

    #Add some extreme value noise to some of the datapoints
    #percent_corrupted = 0.15
    #corrupted_datums = int(np.round(Y.shape[0] * percent_corrupted))
    #indices = np.arange(Y.shape[0])
    #np.random.shuffle(indices)
    #corrupted_indices = indices[:corrupted_datums]
    #print corrupted_indices
    #noise = t_rv.rvs(size=(len(corrupted_indices), 1))
    #Y[corrupted_indices] += noise
    plt.figure(1)
    plt.suptitle('Gaussian likelihood')
    # Kernel object
    kernel1 = GPy.kern.rbf(X.shape[1])
    kernel2 = kernel1.copy()
    kernel3 = kernel1.copy()
    kernel4 = kernel1.copy()
    kernel5 = kernel1.copy()
    kernel6 = kernel1.copy()

    print "Clean Gaussian"
    #A GP should completely break down due to the points as they get a lot of weight
    # create simple GP model
    m = GPy.models.GP_regression(X, Y, kernel=kernel1)
    # optimize
    m.ensure_default_constraints()
    m.optimize()
    # plot
    plt.subplot(211)
    m.plot()
    plt.plot(X_full, Y_full)
    print m

    #Corrupt
    print "Corrupt Gaussian"
    m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
    m.ensure_default_constraints()
    m.optimize()
    plt.subplot(212)
    m.plot()
    plt.plot(X_full, Y_full)
    print m

    plt.figure(2)
    plt.suptitle('Student-t likelihood')
    edited_real_sd = real_sd

    print "Clean student t, ncg"
    t_distribution = student_t(deg_free, sigma=edited_real_sd)
    stu_t_likelihood = Laplace(Y, t_distribution, rasm=False)
    m = GPy.models.GP(X, stu_t_likelihood, kernel3)
    m.ensure_default_constraints()
    m.update_likelihood_approximation()
    m.optimize()
    print(m)
    plt.subplot(221)
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)

    print "Corrupt student t, ncg"
    t_distribution = student_t(deg_free, sigma=edited_real_sd)
    corrupt_stu_t_likelihood = Laplace(Yc.copy(), t_distribution, rasm=False)
    m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel5)
    m.ensure_default_constraints()
    m.update_likelihood_approximation()
    m.optimize()
    print(m)
    plt.subplot(223)
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)

    print "Clean student t, rasm"
    t_distribution = student_t(deg_free, sigma=edited_real_sd)
    stu_t_likelihood = Laplace(Y.copy(), t_distribution, rasm=True)
    m = GPy.models.GP(X, stu_t_likelihood, kernel6)
    m.ensure_default_constraints()
    m.update_likelihood_approximation()
    m.optimize()
    print(m)
    plt.subplot(222)
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)

    print "Corrupt student t, rasm"
    t_distribution = student_t(deg_free, sigma=edited_real_sd)
    corrupt_stu_t_likelihood = Laplace(Yc.copy(), t_distribution, rasm=True)
    m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
    m.ensure_default_constraints()
    m.update_likelihood_approximation()
    m.optimize()
    print(m)
    plt.subplot(224)
    m.plot()
    plt.plot(X_full, Y_full)
    plt.ylim(-2.5, 2.5)
    import ipdb; ipdb.set_trace() ### XXX BREAKPOINT

    ###with a student t distribution, since it has heavy tails it should work well
    ###likelihood_function = student_t(deg_free, sigma=real_var)
    ###lap = Laplace(Y, likelihood_function)
    ###cov = kernel.K(X)
    ###lap.fit_full(cov)

    ###test_range = np.arange(0, 10, 0.1)
    ###plt.plot(test_range, t_rv.pdf(test_range))
    ###for i in xrange(X.shape[0]):
        ###mode = lap.f_hat[i]
        ###covariance = lap.hess_hat_i[i,i]
        ###scaling = np.exp(lap.ln_z_hat)
        ###normalised_approx = norm(loc=mode, scale=covariance)
        ###print "Normal with mode %f, and variance %f" % (mode, covariance)
        ###plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
    ###plt.show()

    return m


def noisy_laplace_approx():
    """
    Example of regressing with a student t likelihood
    """
    #Start a function, any function
    X = np.sort(np.random.uniform(0, 15, 70))[:, None]
    Y = np.sin(X)

    #Add some extreme value noise to some of the datapoints
    percent_corrupted = 0.05
    corrupted_datums = int(np.round(Y.shape[0] * percent_corrupted))
    indices = np.arange(Y.shape[0])
    np.random.shuffle(indices)
    corrupted_indices = indices[:corrupted_datums]
    print corrupted_indices
    noise = np.random.uniform(-10, 10, (len(corrupted_indices), 1))
    Y[corrupted_indices] += noise

    #A GP should completely break down due to the points as they get a lot of weight
    # create simple GP model
    m = GPy.models.GP_regression(X, Y)

    # optimize
    m.ensure_default_constraints()
    m.optimize()
    # plot
    m.plot()
    print m

    #with a student t distribution, since it has heavy tails it should work well