Matrix::multiplyAdd and transposeMultiplyAdd are "level 2" BLAS and speed up the numeric part of the code substantially. Alex might be able to speed them up even more by making them use real BLAS code within Matrix.cpp.

release/4.3a0
Frank Dellaert 2010-01-31 16:04:24 +00:00
parent 3b6c4917a9
commit 5e4b23df59
7 changed files with 44 additions and 13 deletions

View File

@ -99,7 +99,7 @@ void backSubstituteInPlace(const GaussianBayesNet& bn, VectorConfig& y) {
const Symbol& j = it->first;
const Matrix& Rij = it->second;
Vector& xj = x.getReference(j);
axpy(-1.0, Rij*xj, zi); // TODO: use BLAS level 2
multiplyAdd(-1.0,Rij,xj,zi);
}
Vector& xi = x.getReference(i);
xi = gtsam::backSubstituteUpper(cg->get_R(), zi);
@ -132,8 +132,7 @@ VectorConfig backSubstituteTranspose(const GaussianBayesNet& bn,
const Symbol& i = it->first;
const Matrix& Rij = it->second;
Vector& gyi = gy.getReference(i); // should never fail
Matrix Lji = trans(Rij); // TODO avoid transpose of matrix ?
gyi -= Lji * gyj;
transposeMultiplyAdd(-1.0,Rij,gyj,gyi);
}
}

View File

@ -98,7 +98,7 @@ Vector GaussianConditional::solve(const VectorConfig& x) const {
for (Parents::const_iterator it = parents_.begin(); it!= parents_.end(); it++) {
const Symbol& j = it->first;
const Matrix& Aj = it->second;
axpy(-1, Aj * x[j], rhs); // TODO use BLAS level 2
multiplyAdd(-1.0,Aj,x[j],rhs);
}
return backSubstituteUpper(R_, rhs, false);
}

View File

@ -218,7 +218,7 @@ void GaussianFactor::transposeMultiplyAdd(double alpha, const Vector& e,
FOREACH_PAIR(j, Aj, As_)
{
Vector& Xj = x.getReference(*j);
gtsam::transposeMultiplyAdd(*Aj, E, Xj);
gtsam::transposeMultiplyAdd(1.0, *Aj, E, Xj);
}
}

View File

@ -133,6 +133,18 @@ bool assert_equal(const Matrix& expected, const Matrix& actual, double tol) {
return false;
}
/* ************************************************************************* */
void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e) {
// ublas e += prod(A,x) is terribly slow
// TODO: use BLAS
for (int i = 0; i < A.size1(); i++) {
double& ei = e(i);
for (int j = 0; j < A.size2(); j++) {
ei += alpha * A(i, j) * x(j);
}
}
}
/* ************************************************************************* */
Vector operator^(const Matrix& A, const Vector & v) {
if (A.size1()!=v.size()) throw std::invalid_argument(
@ -143,13 +155,13 @@ Vector operator^(const Matrix& A, const Vector & v) {
}
/* ************************************************************************* */
void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x) {
// ublas Xj += prod(trans(Aj),Ei) is terribly slow
void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector& x) {
// ublas x += prod(trans(A),e) is terribly slow
// TODO: use BLAS
for (int j = 0; j < A.size2(); j++) {
double& Xj1 = x(j);
double& xj = x(j);
for (int i = 0; i < A.size1(); i++) {
Xj1 += A(i, j) * e(i);
xj += alpha * A(i, j) * e(i);
}
}
}

View File

@ -86,6 +86,11 @@ bool assert_equal(const Matrix& A, const Matrix& B, double tol = 1e-9);
*/
inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);}
/**
* BLAS Level-2 style e <- e + alpha*A*x
*/
void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e);
/**
* overload ^ for trans(A)*v
* We transpose the vectors for speed.
@ -93,9 +98,9 @@ inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);}
Vector operator^(const Matrix& A, const Vector & v);
/**
* BLAS Level-2 style x <- x + A'*e
* BLAS Level-2 style x <- x + alpha*A'*e
*/
void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x);
void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector& x);
/**
* overload * for vector*matrix multiplication (as BOOST does not)

View File

@ -63,7 +63,7 @@ namespace gtsam {
/** x += alpha* A'*e */
inline void transposeMultiplyAdd(double alpha, const Vector& e, Vector& x) const {
gtsam::transposeMultiplyAdd(A_,alpha*e,x);
gtsam::transposeMultiplyAdd(alpha,A_,e,x);
}
/**

View File

@ -772,6 +772,21 @@ TEST( matrix, square_root_positive )
CHECK(assert_equal(cov, prod(trans(actual),actual)));
}
/* ************************************************************************* */
TEST( matrix, multiplyAdd )
{
Matrix A = Matrix_(3,4,
4., 0., 0., 1.,
0., 4., 0., 2.,
0., 0., 1., 3.
);
Vector x = Vector_(4, 1., 2., 3., 4.), e = Vector_(3, 5., 6., 7.),
expected = e + prod(A, x);
multiplyAdd(1,A,x,e);
CHECK(assert_equal(expected, e));
}
/* ************************************************************************* */
TEST( matrix, transposeMultiplyAdd )
{
@ -783,7 +798,7 @@ TEST( matrix, transposeMultiplyAdd )
Vector x = Vector_(4, 1., 2., 3., 4.), e = Vector_(3, 5., 6., 7.),
expected = x + prod(trans(A), e);
transposeMultiplyAdd(A,e,x);
transposeMultiplyAdd(1,A,e,x);
CHECK(assert_equal(expected, x));
}