Little performance twiddles that make little difference

2010-01-16 07:22:34 +00:00 · 2010-01-16 07:22:34 +00:00 · a4d61d2f23
parent a8afe0da7c
commit a4d61d2f23
2 changed files with 10 additions and 16 deletions
--- a/cpp/Matrix.cpp
+++ b/cpp/Matrix.cpp
@ -299,20 +299,17 @@ void householder_update(Matrix &A, int j, double beta, const Vector& vjm) {
 /* ************************************************************************* */
 // update A, b
 // A' \define A_{S}-ar and b'\define b-ad
-__attribute__ ((noinline))	// uncomment to prevent inlining when profiling
+// __attribute__ ((noinline))	// uncomment to prevent inlining when profiling
 void updateAb(Matrix& A, Vector& b, int j, const Vector& a, const Vector& r, double d) {
 	const size_t m = A.size1(), n = A.size2();
-	for (int i = 0; i < m; ++i) { // update all rows
+	for (int i = 0; i < m; i++) { // update all rows
 		double ai = a(i);
 		b(i) -= ai * d;
-		double *Aptr = A.data().begin() + i * n + j + 1;
+		double *Aij = A.data().begin() + i * n + j + 1;
 		const double *rptr = r.data().begin() + j + 1;
-		for (int j2 = j + 1; j2 < n; ++j2) { // limit to only columns in separator
+		// A(i,j+1:end) -= ai*r(j+1:end)
-			//A(i,j2) -= ai*r(j2);
+		for (int j2 = j + 1; j2 < n; j2++,Aij++,rptr++)
-			*Aptr -= ai * *rptr;
+			*Aij -= ai * (*rptr);
 			Aptr++;
 			rptr++;
 		}
 	}
 }
@ -326,9 +323,7 @@ weighted_eliminate(Matrix& A, Vector& b, const Vector& sigmas) {
 	list<boost::tuple<Vector, double, double> > results;
 	Vector pseudo(m); // allocate storage for pseudo-inverse
-
+	Vector weights = reciprocal(emul(sigmas,sigmas)); // calculate weights once
 	// TODO: calculate weights once
 	Vector weights = reciprocal(emul(sigmas,sigmas));
 	// We loop over all columns, because the columns that can be eliminated
 	// are not necessarily contiguous. For each one, estimate the corresponding
@ -336,9 +331,7 @@ weighted_eliminate(Matrix& A, Vector& b, const Vector& sigmas) {
 	// Then update A and b by substituting x with d-rS, zero-ing out x's column.
 	for (int j=0; j<n; ++j) {
 		// extract the first column of A
-		// TODO: this is an allocate and a copy
+		Vector a(column(A, j)); // ublas::matrix_column is slower !
 		// can we somehow make a "reference" vector, boost magic
 		Vector a(column(A, j));
 		// Calculate weighted pseudo-inverse and corresponding precision
 		double precision = weightedPseudoinverse(a, weights, pseudo);
@ -349,7 +342,7 @@ weighted_eliminate(Matrix& A, Vector& b, const Vector& sigmas) {
 		// create solution and copy into r
 		Vector r(basis(n, j));
 		for (int j2=j+1; j2<n; ++j2)
-			r(j2) = inner_prod(pseudo, column(A, j2));
+			r(j2) = inner_prod(pseudo, ublas::matrix_column<Matrix>(A, j2));
 		// create the rhs
 		double d = inner_prod(pseudo, b);
--- a/cpp/timeGaussianFactorGraph.cpp
+++ b/cpp/timeGaussianFactorGraph.cpp
@ -54,6 +54,7 @@ TEST(timeGaussianFactorGraph, planar)
 	// 1741: 8.12, 8.12, 8.12, 8.14, 8.16
 	// 1742: 5.97, 5.97, 5.97, 5.99, 6.02
 	// 1746: 5.96, 5.96, 5.97, 6.00, 6.04
 	// 1748: 5.91, 5.92, 5.93, 5.95, 5.96
 	int N = 30;
 	double time = timePlanarSmoother(N); cout << time << endl;
 	DOUBLES_EQUAL(5.97,time,0.1);