[linalg] ppca with missing data removed

2026-05-30 14:35:15 +02:00 · 2014-11-11 09:56:36 +00:00 · 2014-11-11 09:56:36 +00:00 · 1e6ed9f873
commit 1e6ed9f873
parent ca0523c6ee
1 changed files with 0 additions and 99 deletions
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -328,105 +328,6 @@ def ppca(Y, Q, iterations=100):
        pass
    return np.asarray_chkfinite(exp_x), np.asarray_chkfinite(W)

-def ppca_missing_data_at_random(Y, Q, iters=100):
-    """
-    EM implementation of Probabilistic pca for when there is missing data.
-
-    Taken from <SheffieldML, https://github.com/SheffieldML>
-
-    .. math:
-        \\mathbf{Y} = \mathbf{XW} + \\epsilon \\text{, where}
-        \\epsilon = \\mathcal{N}(0, \\sigma^2 \mathbf{I})
-
-    :returns: X, W, sigma^2
-    """
-    from numpy.ma import dot as madot
-    import diag
-    from GPy.util.subarray_and_sorting import common_subarrays
-    import time
-    debug = 1
-    # Initialise W randomly
-    N, D = Y.shape
-    W = np.random.randn(Q, D) * 1e-3
-    Y = np.ma.masked_invalid(Y, copy=1)
-    nu = 1.
-    #num_obs_i = 1./Y.count()
-    Ycentered = Y - Y.mean(0)
-
-    X = np.zeros((N,Q))
-    cs = common_subarrays(Y.mask)
-    cr = common_subarrays(Y.mask, 1)
-    Sigma = np.zeros((N, Q, Q))
-    Sigma2 = np.zeros((N, Q, Q))
-    mu = np.zeros(D)
-    """
-    if debug:
-        import matplotlib.pyplot as pylab
-        fig = pylab.figure("FIT MISSING DATA");
-        ax = fig.gca()
-        ax.cla()
-        lines = pylab.plot(np.zeros((N,Q)).dot(W))
-    """
-    W2 = np.zeros((Q,D))
-
-    for i in range(iters):
-#         Sigma = np.linalg.solve(diag.add(madot(W,W.T), nu), diag.times(np.eye(Q),nu))
-#         exp_x = madot(madot(Ycentered, W.T),Sigma)/nu
-#         Ycentered = (Y - exp_x.dot(W).mean(0))
-#         #import ipdb;ipdb.set_trace()
-#         #Ycentered = mu
-#         W = np.linalg.solve(madot(exp_x.T,exp_x) + Sigma, madot(exp_x.T, Ycentered))
-#         nu = (((Ycentered - madot(exp_x, W))**2).sum(0) + madot(W.T,madot(Sigma,W)).sum(0)).sum()/N
-        for csi, (mask, index) in enumerate(cs.iteritems()):
-            mask = ~np.array(mask)
-            Sigma2[index, :, :] = nu * np.linalg.inv(diag.add(W2[:,mask].dot(W2[:,mask].T), nu))
-            #X[index,:] = madot((Sigma[csi]/nu),madot(W,Ycentered[index].T))[:,0]
-        X2 = ((Sigma2/nu) * (madot(Ycentered,W2.T).base)[:,:,None]).sum(-1)
-        mu2 = (Y - X.dot(W)).mean(0)
-        for n in range(N):
-            Sigma[n] = nu * np.linalg.inv(diag.add(W[:,~Y.mask[n]].dot(W[:,~Y.mask[n]].T), nu))
-            X[n, :] = (Sigma[n]/nu).dot(W[:,~Y.mask[n]].dot(Ycentered[n,~Y.mask[n]].T))
-        for d in range(D):
-            mu[d] = (Y[~Y.mask[:,d], d] - X[~Y.mask[:,d]].dot(W[:, d])).mean()
-        Ycentered = (Y - mu)
-        nu3 = 0.
-        for cri, (mask, index) in enumerate(cr.iteritems()):
-            mask = ~np.array(mask)
-            W2[:,index] = np.linalg.solve(X[mask].T.dot(X[mask]) + Sigma[mask].sum(0), madot(X[mask].T, Ycentered[mask,index]))[:,None]
-            W2[:,index] = np.linalg.solve(X.T.dot(X) + Sigma.sum(0), madot(X.T, Ycentered[:,index]))
-            #nu += (((Ycentered[mask,index] - X[mask].dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma[mask].sum(0).dot(W[:,index])).sum(0)).sum()
-            nu3 += (((Ycentered[index] - X.dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma.sum(0).dot(W[:,index])).sum(0)).sum()
-        nu3 /= N
-        nu = 0.
-        nu2 = 0.
-        W = np.zeros((Q,D))
-        for j in range(D):
-            W[:,j] = np.linalg.solve(X[~Y.mask[:,j]].T.dot(X[~Y.mask[:,j]]) + Sigma[~Y.mask[:,j]].sum(0), madot(X[~Y.mask[:,j]].T, Ycentered[~Y.mask[:,j],j]))
-            nu2f = np.tensordot(W[:,j].T, Sigma[~Y.mask[:,j],:,:], [0,1]).dot(W[:,j])
-            nu2s = W[:,j].T.dot(Sigma[~Y.mask[:,j],:,:].sum(0).dot(W[:,j]))
-            nu2 += (((Ycentered[~Y.mask[:,j],j] - X[~Y.mask[:,j],:].dot(W[:,j]))**2) + nu2f).sum()
-            for i in range(N):
-                if not Y.mask[i,j]:
-                    nu += ((Ycentered[i,j] - X[i,:].dot(W[:,j]))**2) + W[:,j].T.dot(Sigma[i,:,:].dot(W[:,j]))
-        nu /= N
-        nu2 /= N
-        nu4 = (((Ycentered - X.dot(W))**2).sum(0) + W.T.dot(Sigma.sum(0).dot(W)).sum(0)).sum()/N
-        import ipdb;ipdb.set_trace()
-        """
-        if debug:
-            #print Sigma[0]
-            print "nu:", nu, "sum(X):", X.sum()
-            pred_y = X.dot(W)
-            for x, l in zip(pred_y.T, lines):
-                l.set_ydata(x)
-            ax.autoscale_view()
-            ax.set_ylim(pred_y.min(), pred_y.max())
-            fig.canvas.draw()
-            time.sleep(.3)
-        """
-    return np.asarray_chkfinite(X), np.asarray_chkfinite(W), nu
-
-
 def tdot_numpy(mat, out=None):
    return np.dot(mat, mat.T, out)