diff --git a/cpp/Matrix.cpp b/cpp/Matrix.cpp index 2ec6d9d32..8196d6ece 100644 --- a/cpp/Matrix.cpp +++ b/cpp/Matrix.cpp @@ -171,16 +171,19 @@ Vector column_(const Matrix& A, size_t j) { } /* ************************************************************************* */ -Vector row(const Matrix& A, size_t i) { +Vector row_(const Matrix& A, size_t i) { if (i>=A.size1()) throw invalid_argument("Row index out of bounds!"); + const double * Aptr = A.data().begin() + A.size2() * i; + return Vector_(A.size2(), Aptr); + // TODO: improve this - size_t n = A.size2(); - Vector a(n); - for (size_t j=0; j& matrices, size_t m, size_t n) dimA2 += M->size2(); } - // original version -// Matrix A(dimA1, dimA2); -// size_t hindex = 0; -// BOOST_FOREACH(const Matrix* M, matrices) { -// for(size_t d1 = 0; d1 < M->size1(); d1++) -// for(size_t d2 = 0; d2 < M->size2(); d2++) -// A(d1, d2+hindex) = (*M)(d1, d2); -// hindex += M->size2(); -// } - - // matrix_range version - // Result: slower -// Matrix A(dimA1, dimA2); -// size_t hindex = 0; -// BOOST_FOREACH(const Matrix* M, matrices) { -// ublas::matrix_range mr(A, ublas::range(0, dimA1), -// ublas::range(hindex, hindex+M->size2())); -// noalias(mr) = *M; -// hindex += M->size2(); -// } - // memcpy version Matrix A(dimA1, dimA2); double * Aptr = A.data().begin(); diff --git a/cpp/Matrix.h b/cpp/Matrix.h index 813e15d8e..5cdbd6e85 100644 --- a/cpp/Matrix.h +++ b/cpp/Matrix.h @@ -157,7 +157,7 @@ Vector column_(const Matrix& A, size_t j); * @param index of the row * @return the row in vector form */ -Vector row(const Matrix& A, size_t j); +Vector row_(const Matrix& A, size_t j); // left multiply with scalar //inline Matrix operator*(double s, const Matrix& A) { return A*s;} diff --git a/cpp/NoiseModel.cpp b/cpp/NoiseModel.cpp index ad6ca3e36..ab2c2312e 100644 --- a/cpp/NoiseModel.cpp +++ b/cpp/NoiseModel.cpp @@ -11,6 +11,11 @@ #include #include +#ifdef GSL +#include // needed for gsl blas +#include +#endif + #include #include #include @@ -37,6 +42,23 @@ namespace gtsam { // __attribute__ ((noinline)) // uncomment to prevent inlining when profiling static void updateAb(Matrix& Ab, int j, const Vector& a, const Vector& rd) { size_t m = Ab.size1(), n = Ab.size2()-1; +#ifdef GSL + // Ab(0:m,j+1:n) = Ab(0:m,j+1:n) - a(0:m)*rd(j+1:end)' + // get a view for Ab + gsl_matrix_view Abg = gsl_matrix_view_array(Ab.data().begin(), m, n+1); + gsl_matrix_view Abg_view = gsl_matrix_submatrix (&(Abg.matrix), 0, j+1, m, n-j); + + // get a view for a + gsl_vector_const_view ag = gsl_vector_const_view_array(a.data().begin(), m); + + // get a view for r + gsl_vector_const_view rdg = gsl_vector_const_view_array(rd.data().begin()+j+1, n-j); + + // rank one update + gsl_blas_dger (-1.0, &(ag.vector), &(rdg.vector), &(Abg_view.matrix)); + +#else + for (int i = 0; i < m; i++) { // update all rows double ai = a(i); double *Aij = Ab.data().begin() + i * (n+1) + j + 1; @@ -45,6 +67,7 @@ namespace gtsam { for (int j2 = j + 1; j2 < n+1; j2++, Aij++, rptr++) *Aij -= ai * (*rptr); } +#endif } /* ************************************************************************* */ diff --git a/cpp/Vector.cpp b/cpp/Vector.cpp index 9149e62d2..537c4e1a8 100644 --- a/cpp/Vector.cpp +++ b/cpp/Vector.cpp @@ -18,6 +18,11 @@ #include #endif +#ifdef GSL +#include // needed for gsl blas +#include +#endif + #include #include @@ -232,29 +237,30 @@ namespace gtsam { } /* ************************************************************************* */ - pair house(Vector &x) - { - const double x02 = x(0)*x(0); - const double sigma = inner_prod(trans(x),x) - x02; - double beta = 0.0; - - Vector v(x); v(0) = 1.0; - - if( sigma == 0.0 ) - beta = 0.0; - else { - double mu = sqrt(x02 + sigma); - if( x(0) <= 0.0 ) - v(0) = x(0) - mu; - else - v(0) = -sigma / (x(0) + mu); - - const double v02 = v(0)*v(0); - beta = 2.0 * v02 / (sigma + v02); - v = v / v(0); - } - - return make_pair(beta, v); + pair house(const Vector &x) { + const double x0 = x(0); + const double x02 = x0*x0; + + // old code - GSL verison was actually a bit slower + const double sigma = inner_prod(trans(x),x) - x02; + double beta = 0.0; + + Vector v(x); v(0) = 1.0; + + if( sigma == 0.0 ) + beta = 0.0; + else { + double mu = sqrt(x02 + sigma); + if( x0 <= 0.0 ) + v(0) = x0 - mu; + else + v(0) = -sigma / (x0 + mu); + + const double v02 = v(0)*v(0); + beta = 2.0 * v02 / (sigma + v02); + v = v / v(0); + } + return make_pair(beta, v); } /* ************************************************************************* */ diff --git a/cpp/Vector.h b/cpp/Vector.h index 6a02d1312..374d940cc 100644 --- a/cpp/Vector.h +++ b/cpp/Vector.h @@ -211,7 +211,7 @@ Vector operator/(double s, const Vector& v); * from x, such that the corresponding Householder reflection zeroes out * all but x.(j), j is base 0. Golub & Van Loan p 210. */ -std::pair house(Vector &x); +std::pair house(const Vector &x); /** * Weighted Householder solution vector, diff --git a/cpp/testMatrix.cpp b/cpp/testMatrix.cpp index 894d974dc..4971c9b64 100644 --- a/cpp/testMatrix.cpp +++ b/cpp/testMatrix.cpp @@ -174,15 +174,15 @@ TEST( matrix, row ) 0., -1., 0., 1., 0., 0., 0.3, 1., 0., 0., 0., -1., 0., 0.2, 0., 1., 0., 0., 0., -1., -0.1); - Vector a1 = row(A, 0); + Vector a1 = row_(A, 0); Vector exp1 = Vector_(7, -1., 0., 1., 0., 0., 0., -0.2); CHECK(assert_equal(a1, exp1)); - Vector a2 = row(A, 2); + Vector a2 = row_(A, 2); Vector exp2 = Vector_(7, 1., 0., 0., 0., -1., 0., 0.2); CHECK(assert_equal(a2, exp2)); - Vector a3 = row(A, 3); + Vector a3 = row_(A, 3); Vector exp3 = Vector_(7, 0., 1., 0., 0., 0., -1., -0.1); CHECK(assert_equal(a3, exp3)); } diff --git a/cpp/timeGaussianFactorGraph.cpp b/cpp/timeGaussianFactorGraph.cpp index c71ca036b..c2cee3b49 100644 --- a/cpp/timeGaussianFactorGraph.cpp +++ b/cpp/timeGaussianFactorGraph.cpp @@ -55,7 +55,7 @@ TEST(timeGaussianFactorGraph, linearTime) int T = 100000; double time1 = timeKalmanSmoother( T); cout << "timeKalmanSmoother( T): " << time1; double time2 = timeKalmanSmoother(2*T); cout << " (2*T): " << time2 << endl; - DOUBLES_EQUAL(2*time1,time2,0.1); + DOUBLES_EQUAL(2*time1,time2,0.2); } /* ************************************************************************* */ @@ -70,10 +70,11 @@ TEST(timeGaussianFactorGraph, planar) // Alex's Machine // Initial: - // 1907 (N = 30) : 0.14 - // (N = 100): 16.36 + // 1907 (N = 30) : 0.14 + // (N = 100) : 16.36 // Improved (int->size_t) - // (N = 100): 15.39 + // (N = 100) : 15.39 + // Using GSL/BLAS for updateAb : 12.87 // Switch to 100*100 grid = 10K poses // 1879: 15.6498 15.3851 15.5279 diff --git a/cpp/timeMatrix.cpp b/cpp/timeMatrix.cpp index 7ac4f3216..adc89df20 100644 --- a/cpp/timeMatrix.cpp +++ b/cpp/timeMatrix.cpp @@ -125,9 +125,12 @@ double timeVScaleRow(size_t m, size_t n, size_t reps) { /** * Results: - * Alex's Machine + * Alex's Machine (reps = 200000) * - ublas matrix_column : 4.63 sec * - naive implementation : 4.70 sec + * + * reps = 2000000 + * - */ double timeColumn(size_t reps) { // create a matrix @@ -144,8 +147,8 @@ double timeColumn(size_t reps) { boost::timer t; for (size_t i=0; i(M, j); - //result = column(M, j); + //result = ublas::matrix_column(M, j); + result = column_(M, j); elapsed = t.elapsed(); } return elapsed; @@ -154,10 +157,22 @@ double timeColumn(size_t reps) { /* * Results * Alex's machine + * + * Runs at reps = 500000 * Baseline (no householder, just matrix copy) : 0.05 sec * Initial : 8.20 sec * All in one function : 7.89 sec * Replace householder update with GSL, ATLAS : 0.92 sec + * + * Runs at reps = 2000000 + * Baseline (GSL/ATLAS householder update) : 3.61 sec + * + * Runs at reps = 5000000 + * Baseline : 8.76 sec + * GSL/Atlas version of updateAb : 9.03 sec // Why does this have an effect? + * Inlining house() : 6.33 sec + * Inlining householder_update [GSL] : 6.15 sec + * */ double timeHouseholder(size_t reps) { // create a matrix @@ -182,35 +197,35 @@ double timeHouseholder(size_t reps) { int main(int argc, char ** argv) { -// // Time collect() -// cout << "Starting Matrix::collect() Timing" << endl; -// //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50; -// size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; -// double collect_time1 = timeCollect(p, m, n, false, reps); -// double collect_time2 = timeCollect(p, m, n, true, reps); -// cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; -// cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; -// -// // Time vector_scale_column -// cout << "Starting Matrix::vector_scale(column) Timing" << endl; -// size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; -// double vsColumn_time = timeVScaleColumn(m1, n1, reps1); -// cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; -// -// // Time vector_scale_row -// cout << "Starting Matrix::vector_scale(row) Timing" << endl; -// double vsRow_time = timeVScaleRow(m1, n1, reps1); -// cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; -// -// // Time column_() NOTE: using the ublas version -// cout << "Starting column_() Timing" << endl; -// size_t reps2 = 200000; -// double column_time = timeColumn(reps2); -// cout << "Time: " << column_time << " sec" << endl; + // Time collect() + cout << "Starting Matrix::collect() Timing" << endl; + //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50; + size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; + double collect_time1 = timeCollect(p, m, n, false, reps); + double collect_time2 = timeCollect(p, m, n, true, reps); + cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; + cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; + + // Time vector_scale_column + cout << "Starting Matrix::vector_scale(column) Timing" << endl; + size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; + double vsColumn_time = timeVScaleColumn(m1, n1, reps1); + cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; + + // Time vector_scale_row + cout << "Starting Matrix::vector_scale(row) Timing" << endl; + double vsRow_time = timeVScaleRow(m1, n1, reps1); + cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; + + // Time column_() NOTE: using the ublas version + cout << "Starting column_() Timing" << endl; + size_t reps2 = 2000000; + double column_time = timeColumn(reps2); + cout << "Time: " << column_time << " sec" << endl; // Time householder_ function cout << "Starting householder_() Timing" << endl; - size_t reps_house = 500000; + size_t reps_house = 5000000; double house_time = timeHouseholder(reps_house); cout << "Elapsed time for householder_() : " << house_time << " sec" << endl;