Matrix::multiplyAdd and transposeMultiplyAdd are "level 2" BLAS and speed up the numeric part of the code substantially. Alex might be able to speed them up even more by making them use real BLAS code within Matrix.cpp.

2010-01-31 16:04:24 +00:00 · 2010-01-31 16:04:24 +00:00 · 5e4b23df59
parent 3b6c4917a9
commit 5e4b23df59
7 changed files with 44 additions and 13 deletions
--- a/cpp/GaussianBayesNet.cpp
+++ b/cpp/GaussianBayesNet.cpp
@ -99,7 +99,7 @@ void backSubstituteInPlace(const GaussianBayesNet& bn, VectorConfig& y) {
 			const Symbol& j = it->first;
 			const Matrix& Rij = it->second;
 			Vector& xj = x.getReference(j);
-			axpy(-1.0, Rij*xj, zi); // TODO: use BLAS level 2
+			multiplyAdd(-1.0,Rij,xj,zi);
 		}
 		Vector& xi = x.getReference(i);
 		xi = gtsam::backSubstituteUpper(cg->get_R(), zi);
@ -132,8 +132,7 @@ VectorConfig backSubstituteTranspose(const GaussianBayesNet& bn,
 			const Symbol& i = it->first;
 			const Matrix& Rij = it->second;
 			Vector& gyi = gy.getReference(i);  // should never fail
-			Matrix Lji = trans(Rij);  // TODO avoid transpose of matrix ?
-			gyi -= Lji * gyj;
+			transposeMultiplyAdd(-1.0,Rij,gyj,gyi);
 		}
 	}

--- a/cpp/GaussianConditional.cpp
+++ b/cpp/GaussianConditional.cpp
@ -98,7 +98,7 @@ Vector GaussianConditional::solve(const VectorConfig& x) const {
 	for (Parents::const_iterator it = parents_.begin(); it!= parents_.end(); it++) {
 		const Symbol& j = it->first;
 		const Matrix& Aj = it->second;
-		axpy(-1, Aj * x[j], rhs); // TODO use BLAS level 2
+		multiplyAdd(-1.0,Aj,x[j],rhs);
 	}
 	return backSubstituteUpper(R_, rhs, false);
 }
--- a/cpp/GaussianFactor.cpp
+++ b/cpp/GaussianFactor.cpp
@ -218,7 +218,7 @@ void GaussianFactor::transposeMultiplyAdd(double alpha, const Vector& e,
 	FOREACH_PAIR(j, Aj, As_)
 	{
 		Vector& Xj = x.getReference(*j);
-		gtsam::transposeMultiplyAdd(*Aj, E, Xj);
+		gtsam::transposeMultiplyAdd(1.0, *Aj, E, Xj);
 	}
 }

--- a/cpp/Matrix.cpp
+++ b/cpp/Matrix.cpp
@ -133,6 +133,18 @@ bool assert_equal(const Matrix& expected, const Matrix& actual, double tol) {
  return false;
 }

+/* ************************************************************************* */
+void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e) {
+	// ublas e += prod(A,x) is terribly slow
+	// TODO: use BLAS
+	for (int i = 0; i < A.size1(); i++) {
+			double& ei = e(i);
+			for (int j = 0; j < A.size2(); j++) {
+				ei += alpha * A(i, j) * x(j);
+			}
+		}
+}
+
 /* ************************************************************************* */
 Vector operator^(const Matrix& A, const Vector & v) {
  if (A.size1()!=v.size()) throw std::invalid_argument(
@ -143,13 +155,13 @@ Vector operator^(const Matrix& A, const Vector & v) {
 }

 /* ************************************************************************* */
-void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x) {
-	// ublas Xj += prod(trans(Aj),Ei) is terribly slow
+void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector& x) {
+	// ublas x += prod(trans(A),e) is terribly slow
 	// TODO: use BLAS
 	for (int j = 0; j < A.size2(); j++) {
-		double& Xj1 = x(j);
+		double& xj = x(j);
 		for (int i = 0; i < A.size1(); i++) {
-			Xj1 += A(i, j) * e(i);
+			xj += alpha * A(i, j) * e(i);
 		}
 	}
 }
--- a/cpp/Matrix.h
+++ b/cpp/Matrix.h
@ -86,6 +86,11 @@ bool assert_equal(const Matrix& A, const Matrix& B, double tol = 1e-9);
 */
 inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);}

+/**
+ * BLAS Level-2 style e <- e + alpha*A*x
+ */
+void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e);
+
 /**
 * overload ^ for trans(A)*v
 * We transpose the vectors for speed.
@ -93,9 +98,9 @@ inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);}
 Vector operator^(const Matrix& A, const Vector & v);

 /**
- * BLAS Level-2 style x <- x + A'*e
+ * BLAS Level-2 style x <- x + alpha*A'*e
 */
-void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x);
+void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector& x);

 /**
 * overload * for vector*matrix multiplication (as BOOST does not)
--- a/cpp/iterative.h
+++ b/cpp/iterative.h
@ -63,7 +63,7 @@ namespace gtsam {

 		/** x += alpha* A'*e */
 		inline void transposeMultiplyAdd(double alpha, const Vector& e, Vector& x) const {
-			gtsam::transposeMultiplyAdd(A_,alpha*e,x);
+			gtsam::transposeMultiplyAdd(alpha,A_,e,x);
 		}

 		/**
--- a/cpp/testMatrix.cpp
+++ b/cpp/testMatrix.cpp
@ -772,6 +772,21 @@ TEST( matrix, square_root_positive )
  CHECK(assert_equal(cov, prod(trans(actual),actual)));
 }

+/* ************************************************************************* */
+TEST( matrix, multiplyAdd )
+{
+  Matrix A = Matrix_(3,4,
+			4., 0., 0., 1.,
+			0., 4., 0., 2.,
+			0., 0., 1., 3.
+      );
+	Vector x = Vector_(4, 1., 2., 3., 4.), e = Vector_(3, 5., 6., 7.),
+			expected = e + prod(A, x);
+
+  multiplyAdd(1,A,x,e);
+  CHECK(assert_equal(expected, e));
+}
+
 /* ************************************************************************* */
 TEST( matrix, transposeMultiplyAdd )
 {
@ -783,7 +798,7 @@ TEST( matrix, transposeMultiplyAdd )
 	Vector x = Vector_(4, 1., 2., 3., 4.), e = Vector_(3, 5., 6., 7.),
 			expected = x + prod(trans(A), e);

-  transposeMultiplyAdd(A,e,x);
+  transposeMultiplyAdd(1,A,e,x);
  CHECK(assert_equal(expected, x));
 }