added a tdot function (thanks Iain)

2026-05-21 14:05:14 +02:00 · 2013-04-26 17:26:43 +01:00 · 2013-04-26 17:26:43 +01:00 · ef15de9411
commit ef15de9411
parent 43b720c848
2 changed files with 99 additions and 5 deletions
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@ -108,9 +108,6 @@ class sparse_GP(GP):
        self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
        self.psi1V = np.dot(self.psi1, self.V)
        #tmp = np.dot(self.Lmi.T, self.LBi.T)
        #tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
        #self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
        tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
        self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
        self.Cpsi1V = np.dot(self.C,self.psi1V)
@ -171,7 +168,7 @@ class sparse_GP(GP):
            #likelihood is not heterscedatic
            self.partial_for_likelihood =   - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
            self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
-            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
+            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable?
            self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -1,9 +1,12 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 #tdot function courtesy of Ian Murray:
 # Iain Murray, April 2013. iain contactable via iainmurray.net
 # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
 import numpy as np
-from scipy import linalg, optimize
+from scipy import linalg, optimize, weave
 import pylab as pb
 import Tango
 import sys
@ -11,9 +14,17 @@ import re
 import pdb
 import cPickle
 import types
 import ctypes
 from ctypes import byref, c_char, c_int, c_double # TODO
 #import scipy.lib.lapack.flapack
 import scipy as sp
 try:
    _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__)
    _blas_available = True
 except:
    _blas_available = False
 def trace_dot(a,b):
    """
    efficiently compute the trace of the matrix product of a and b
@ -175,3 +186,89 @@ def PCA(Y, Q):
    X /= v;
    W *= v;
    return X, W.T
 def tdot_numpy(mat,out=None):
    return np.dot(mat,mat.T,out)
 def tdot_blas(mat, out=None):
    """returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles."""
    if (mat.dtype != 'float64') or (len(mat.shape) != 2):
        return np.dot(mat, mat.T)
    nn = mat.shape[0]
    if not out:
        out = np.zeros((nn,nn))
    else:
        assert(out.dtype == 'float64')
        assert(out.shape == (nn,nn))
        # FIXME: should allow non-contiguous out, and copy output into it:
        assert(8 in out.strides)
        # zeroing needed because of dumb way I copy across triangular answer
        out[:] = 0.0
    ## Call to DSYRK from BLAS
    # If already in Fortran order (rare), and has the right sorts of strides I
    # could avoid the copy. I also thought swapping to cblas API would allow use
    # of C order. However, I tried that and had errors with large matrices:
    # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
    mat = mat.copy(order='F')
    TRANS = c_char('n')
    N = c_int(mat.shape[0])
    K = c_int(mat.shape[1])
    LDA = c_int(mat.shape[0])
    UPLO = c_char('l')
    ALPHA = c_double(1.0)
    A = mat.ctypes.data_as(ctypes.c_void_p)
    BETA = c_double(0.0)
    C = out.ctypes.data_as(ctypes.c_void_p)
    LDC = c_int(np.max(out.strides) / 8)
    _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
            byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
    symmetrify(out.T)
    return out
 def tdot(*args, **kwargs):
    if _blas_available:
        return tdot_blas(*args,**kwargs)
    else:
        return tdot_numpy(*args,**kwargs)
 def symmetrify(A):
    """
    Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
    works IN PLACE.
    """
    N,M = A.shape
    assert N==M
    c_contig_code = """
    for (int i=1; i<N; i++){
      for (int j=0; j<i; j++){
        A[i+j*N] = A[i*N+j];
      }
    }
    """
    f_contig_code = """
    for (int i=1; i<N; i++){
      for (int j=0; j<i; j++){
        A[i*N+j] = A[i+j*N];
      }
    }
    """
    if A.flags['C_CONTIGUOUS']:
        weave.inline(c_contig_code,['A','N'])
    elif A.flags['F_CONTIGUOUS']:
        weave.inline(f_contig_code,['A','N'])
    else:
        tmp = np.tril(A)
        A[:] = 0.0
        A += tmp
        A += np.tril(tmp,-1).T
 def symmetrify_murray(A):
    A += A.T
    nn = A.shape[0]
    A[[range(nn),range(nn)]] /= 2.0