Matrix::multiplyAdd and transposeMultiplyAdd are "level 2" BLAS and speed up the numeric part of the code substantially. Alex might be able to speed them up even more by making them use real BLAS code within Matrix.cpp.
parent
3b6c4917a9
commit
5e4b23df59
|
|
@ -99,7 +99,7 @@ void backSubstituteInPlace(const GaussianBayesNet& bn, VectorConfig& y) {
|
||||||
const Symbol& j = it->first;
|
const Symbol& j = it->first;
|
||||||
const Matrix& Rij = it->second;
|
const Matrix& Rij = it->second;
|
||||||
Vector& xj = x.getReference(j);
|
Vector& xj = x.getReference(j);
|
||||||
axpy(-1.0, Rij*xj, zi); // TODO: use BLAS level 2
|
multiplyAdd(-1.0,Rij,xj,zi);
|
||||||
}
|
}
|
||||||
Vector& xi = x.getReference(i);
|
Vector& xi = x.getReference(i);
|
||||||
xi = gtsam::backSubstituteUpper(cg->get_R(), zi);
|
xi = gtsam::backSubstituteUpper(cg->get_R(), zi);
|
||||||
|
|
@ -132,8 +132,7 @@ VectorConfig backSubstituteTranspose(const GaussianBayesNet& bn,
|
||||||
const Symbol& i = it->first;
|
const Symbol& i = it->first;
|
||||||
const Matrix& Rij = it->second;
|
const Matrix& Rij = it->second;
|
||||||
Vector& gyi = gy.getReference(i); // should never fail
|
Vector& gyi = gy.getReference(i); // should never fail
|
||||||
Matrix Lji = trans(Rij); // TODO avoid transpose of matrix ?
|
transposeMultiplyAdd(-1.0,Rij,gyj,gyi);
|
||||||
gyi -= Lji * gyj;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ Vector GaussianConditional::solve(const VectorConfig& x) const {
|
||||||
for (Parents::const_iterator it = parents_.begin(); it!= parents_.end(); it++) {
|
for (Parents::const_iterator it = parents_.begin(); it!= parents_.end(); it++) {
|
||||||
const Symbol& j = it->first;
|
const Symbol& j = it->first;
|
||||||
const Matrix& Aj = it->second;
|
const Matrix& Aj = it->second;
|
||||||
axpy(-1, Aj * x[j], rhs); // TODO use BLAS level 2
|
multiplyAdd(-1.0,Aj,x[j],rhs);
|
||||||
}
|
}
|
||||||
return backSubstituteUpper(R_, rhs, false);
|
return backSubstituteUpper(R_, rhs, false);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -218,7 +218,7 @@ void GaussianFactor::transposeMultiplyAdd(double alpha, const Vector& e,
|
||||||
FOREACH_PAIR(j, Aj, As_)
|
FOREACH_PAIR(j, Aj, As_)
|
||||||
{
|
{
|
||||||
Vector& Xj = x.getReference(*j);
|
Vector& Xj = x.getReference(*j);
|
||||||
gtsam::transposeMultiplyAdd(*Aj, E, Xj);
|
gtsam::transposeMultiplyAdd(1.0, *Aj, E, Xj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,18 @@ bool assert_equal(const Matrix& expected, const Matrix& actual, double tol) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ************************************************************************* */
|
||||||
|
void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e) {
|
||||||
|
// ublas e += prod(A,x) is terribly slow
|
||||||
|
// TODO: use BLAS
|
||||||
|
for (int i = 0; i < A.size1(); i++) {
|
||||||
|
double& ei = e(i);
|
||||||
|
for (int j = 0; j < A.size2(); j++) {
|
||||||
|
ei += alpha * A(i, j) * x(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
Vector operator^(const Matrix& A, const Vector & v) {
|
Vector operator^(const Matrix& A, const Vector & v) {
|
||||||
if (A.size1()!=v.size()) throw std::invalid_argument(
|
if (A.size1()!=v.size()) throw std::invalid_argument(
|
||||||
|
|
@ -143,13 +155,13 @@ Vector operator^(const Matrix& A, const Vector & v) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x) {
|
void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector& x) {
|
||||||
// ublas Xj += prod(trans(Aj),Ei) is terribly slow
|
// ublas x += prod(trans(A),e) is terribly slow
|
||||||
// TODO: use BLAS
|
// TODO: use BLAS
|
||||||
for (int j = 0; j < A.size2(); j++) {
|
for (int j = 0; j < A.size2(); j++) {
|
||||||
double& Xj1 = x(j);
|
double& xj = x(j);
|
||||||
for (int i = 0; i < A.size1(); i++) {
|
for (int i = 0; i < A.size1(); i++) {
|
||||||
Xj1 += A(i, j) * e(i);
|
xj += alpha * A(i, j) * e(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,11 @@ bool assert_equal(const Matrix& A, const Matrix& B, double tol = 1e-9);
|
||||||
*/
|
*/
|
||||||
inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);}
|
inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BLAS Level-2 style e <- e + alpha*A*x
|
||||||
|
*/
|
||||||
|
void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* overload ^ for trans(A)*v
|
* overload ^ for trans(A)*v
|
||||||
* We transpose the vectors for speed.
|
* We transpose the vectors for speed.
|
||||||
|
|
@ -93,9 +98,9 @@ inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);}
|
||||||
Vector operator^(const Matrix& A, const Vector & v);
|
Vector operator^(const Matrix& A, const Vector & v);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BLAS Level-2 style x <- x + A'*e
|
* BLAS Level-2 style x <- x + alpha*A'*e
|
||||||
*/
|
*/
|
||||||
void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x);
|
void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector& x);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* overload * for vector*matrix multiplication (as BOOST does not)
|
* overload * for vector*matrix multiplication (as BOOST does not)
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ namespace gtsam {
|
||||||
|
|
||||||
/** x += alpha* A'*e */
|
/** x += alpha* A'*e */
|
||||||
inline void transposeMultiplyAdd(double alpha, const Vector& e, Vector& x) const {
|
inline void transposeMultiplyAdd(double alpha, const Vector& e, Vector& x) const {
|
||||||
gtsam::transposeMultiplyAdd(A_,alpha*e,x);
|
gtsam::transposeMultiplyAdd(alpha,A_,e,x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -772,6 +772,21 @@ TEST( matrix, square_root_positive )
|
||||||
CHECK(assert_equal(cov, prod(trans(actual),actual)));
|
CHECK(assert_equal(cov, prod(trans(actual),actual)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ************************************************************************* */
|
||||||
|
TEST( matrix, multiplyAdd )
|
||||||
|
{
|
||||||
|
Matrix A = Matrix_(3,4,
|
||||||
|
4., 0., 0., 1.,
|
||||||
|
0., 4., 0., 2.,
|
||||||
|
0., 0., 1., 3.
|
||||||
|
);
|
||||||
|
Vector x = Vector_(4, 1., 2., 3., 4.), e = Vector_(3, 5., 6., 7.),
|
||||||
|
expected = e + prod(A, x);
|
||||||
|
|
||||||
|
multiplyAdd(1,A,x,e);
|
||||||
|
CHECK(assert_equal(expected, e));
|
||||||
|
}
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
TEST( matrix, transposeMultiplyAdd )
|
TEST( matrix, transposeMultiplyAdd )
|
||||||
{
|
{
|
||||||
|
|
@ -783,7 +798,7 @@ TEST( matrix, transposeMultiplyAdd )
|
||||||
Vector x = Vector_(4, 1., 2., 3., 4.), e = Vector_(3, 5., 6., 7.),
|
Vector x = Vector_(4, 1., 2., 3., 4.), e = Vector_(3, 5., 6., 7.),
|
||||||
expected = x + prod(trans(A), e);
|
expected = x + prod(trans(A), e);
|
||||||
|
|
||||||
transposeMultiplyAdd(A,e,x);
|
transposeMultiplyAdd(1,A,e,x);
|
||||||
CHECK(assert_equal(expected, x));
|
CHECK(assert_equal(expected, x));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue