added a tdot function (thanks Iain)

This commit is contained in:
James Hensman 2013-04-26 17:26:43 +01:00
parent 43b720c848
commit ef15de9411
2 changed files with 99 additions and 5 deletions

View file

@ -108,9 +108,6 @@ class sparse_GP(GP):
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
#tmp = np.dot(self.Lmi.T, self.LBi.T)
#tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
#self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
self.Cpsi1V = np.dot(self.C,self.psi1V)
@ -171,7 +168,7 @@ class sparse_GP(GP):
#likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable?
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))

View file

@ -1,9 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#tdot function courtesy of Ian Murray:
# Iain Murray, April 2013. iain contactable via iainmurray.net
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
import numpy as np
from scipy import linalg, optimize
from scipy import linalg, optimize, weave
import pylab as pb
import Tango
import sys
@ -11,9 +14,17 @@ import re
import pdb
import cPickle
import types
import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO
#import scipy.lib.lapack.flapack
import scipy as sp
try:
_blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__)
_blas_available = True
except:
_blas_available = False
def trace_dot(a,b):
"""
efficiently compute the trace of the matrix product of a and b
@ -175,3 +186,89 @@ def PCA(Y, Q):
X /= v;
W *= v;
return X, W.T
def tdot_numpy(mat,out=None):
return np.dot(mat,mat.T,out)
def tdot_blas(mat, out=None):
"""returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles."""
if (mat.dtype != 'float64') or (len(mat.shape) != 2):
return np.dot(mat, mat.T)
nn = mat.shape[0]
if not out:
out = np.zeros((nn,nn))
else:
assert(out.dtype == 'float64')
assert(out.shape == (nn,nn))
# FIXME: should allow non-contiguous out, and copy output into it:
assert(8 in out.strides)
# zeroing needed because of dumb way I copy across triangular answer
out[:] = 0.0
## Call to DSYRK from BLAS
# If already in Fortran order (rare), and has the right sorts of strides I
# could avoid the copy. I also thought swapping to cblas API would allow use
# of C order. However, I tried that and had errors with large matrices:
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
mat = mat.copy(order='F')
TRANS = c_char('n')
N = c_int(mat.shape[0])
K = c_int(mat.shape[1])
LDA = c_int(mat.shape[0])
UPLO = c_char('l')
ALPHA = c_double(1.0)
A = mat.ctypes.data_as(ctypes.c_void_p)
BETA = c_double(0.0)
C = out.ctypes.data_as(ctypes.c_void_p)
LDC = c_int(np.max(out.strides) / 8)
_blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
symmetrify(out.T)
return out
def tdot(*args, **kwargs):
if _blas_available:
return tdot_blas(*args,**kwargs)
else:
return tdot_numpy(*args,**kwargs)
def symmetrify(A):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
works IN PLACE.
"""
N,M = A.shape
assert N==M
c_contig_code = """
for (int i=1; i<N; i++){
for (int j=0; j<i; j++){
A[i+j*N] = A[i*N+j];
}
}
"""
f_contig_code = """
for (int i=1; i<N; i++){
for (int j=0; j<i; j++){
A[i*N+j] = A[i+j*N];
}
}
"""
if A.flags['C_CONTIGUOUS']:
weave.inline(c_contig_code,['A','N'])
elif A.flags['F_CONTIGUOUS']:
weave.inline(f_contig_code,['A','N'])
else:
tmp = np.tril(A)
A[:] = 0.0
A += tmp
A += np.tril(tmp,-1).T
def symmetrify_murray(A):
A += A.T
nn = A.shape[0]
A[[range(nn),range(nn)]] /= 2.0