Improved performance of updateAb in NoiseModel with GSL/ATLAS. Various other small optimizations were made.

release/4.3a0
Alex Cunningham 2010-01-21 18:51:59 +00:00
parent 2183e09c67
commit ac746ccead
8 changed files with 200 additions and 91 deletions

View File

@ -171,16 +171,19 @@ Vector column_(const Matrix& A, size_t j) {
} }
/* ************************************************************************* */ /* ************************************************************************* */
Vector row(const Matrix& A, size_t i) { Vector row_(const Matrix& A, size_t i) {
if (i>=A.size1()) if (i>=A.size1())
throw invalid_argument("Row index out of bounds!"); throw invalid_argument("Row index out of bounds!");
const double * Aptr = A.data().begin() + A.size2() * i;
return Vector_(A.size2(), Aptr);
// TODO: improve this // TODO: improve this
size_t n = A.size2(); // size_t n = A.size2();
Vector a(n); // Vector a(n);
for (size_t j=0; j<n; ++j) // for (size_t j=0; j<n; ++j)
a(j) = A(i,j); // a(j) = A(i,j);
return a; // return a;
} }
/* ************************************************************************* */ /* ************************************************************************* */
@ -322,8 +325,30 @@ void householder_update(Matrix &A, int j, double beta, const Vector& vjm) {
// __attribute__ ((noinline)) // uncomment to prevent inlining when profiling // __attribute__ ((noinline)) // uncomment to prevent inlining when profiling
static void updateAb(Matrix& A, Vector& b, int j, const Vector& a, static void updateAb(Matrix& A, Vector& b, int j, const Vector& a,
const Vector& r, double d) { const Vector& r, double d) {
// TODO: do some optimization here if possible
const size_t m = A.size1(), n = A.size2(); const size_t m = A.size1(), n = A.size2();
#ifdef GSL
// update A
// A(0:m,j+1:end) = A(0:m,j+1:end) - a(0:m)*r(j+1:end)'
// get a view for A
// gsl_matrix_view Ag = gsl_matrix_view_array(A.data().begin(), m, n);
// gsl_matrix_view Ag_view = gsl_matrix_submatrix (&(Ag.matrix), 0, j+1, m, n-j-1);
// // get a view for r
// gsl_vector_const_view rg = gsl_vector_const_view_array(r.data().begin()+j+1, n-j-1);
// // get a view for a
// gsl_vector_const_view ag = gsl_vector_const_view_array(a.data().begin(), m);
//
// // rank one update
// gsl_blas_dger (-1.0, &(ag.vector), &(rg.vector), &(Ag_view.matrix));
//
// // update b
// double * bptr = b.data().begin();
// const double * aptr = a.data().begin();
// for (size_t i = 0; i < m; i++) {
// *(bptr+i) -= d* *(aptr+i);
// }
// original
for (size_t i = 0; i < m; i++) { // update all rows for (size_t i = 0; i < m; i++) { // update all rows
double ai = a(i); double ai = a(i);
b(i) -= ai * d; b(i) -= ai * d;
@ -333,6 +358,18 @@ static void updateAb(Matrix& A, Vector& b, int j, const Vector& a,
for (size_t j2 = j + 1; j2 < n; j2++, Aij++, rptr++) for (size_t j2 = j + 1; j2 < n; j2++, Aij++, rptr++)
*Aij -= ai * (*rptr); *Aij -= ai * (*rptr);
} }
#else
for (size_t i = 0; i < m; i++) { // update all rows
double ai = a(i);
b(i) -= ai * d;
double *Aij = A.data().begin() + i * n + j + 1;
const double *rptr = r.data().begin() + j + 1;
// A(i,j+1:end) -= ai*r(j+1:end)
for (size_t j2 = j + 1; j2 < n; j2++, Aij++, rptr++)
*Aij -= ai * (*rptr);
}
#endif
} }
/* ************************************************************************* */ /* ************************************************************************* */
@ -396,7 +433,54 @@ void householder_(Matrix &A, size_t k)
{ {
const size_t m = A.size1(), n = A.size2(), kprime = min(k,min(m,n)); const size_t m = A.size1(), n = A.size2(), kprime = min(k,min(m,n));
// Original version #ifdef GSL
// loop over the kprime first columns
for(size_t j=0; j < kprime; j++){
// below, the indices r,c always refer to original A
// copy column from matrix to xjm, i.e. x(j:m) = A(j:m,j)
Vector xjm(m-j);
for(size_t r = j ; r < m; r++)
xjm(r-j) = A(r,j);
// calculate the Householder vector
// COPIED IN: boost::tie(beta,vjm) = house(xjm);
const double x0 = xjm(0);
const double x02 = x0*x0;
const double sigma = inner_prod(trans(xjm),xjm) - x02;
double beta = 0.0; Vector vjm(xjm); vjm(0) = 1.0;
if( sigma == 0.0 )
beta = 0.0;
else {
double mu = sqrt(x02 + sigma);
if( x0 <= 0.0 )
vjm(0) = x0 - mu;
else
vjm(0) = -sigma / (x0 + mu);
const double v02 = vjm(0)*vjm(0);
beta = 2.0 * v02 / (sigma + v02);
vjm = vjm / vjm(0);
}
// do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w'
//householder_update(A, j, beta, vjm);
// inlined use GSL version
gsl_vector_const_view v = gsl_vector_const_view_array(vjm.data().begin(), m-j);
gsl_matrix_view Ag = gsl_matrix_view_array(A.data().begin(), m, n);
gsl_matrix_view Ag_view = gsl_matrix_submatrix (&(Ag.matrix), j, 0, m-j, n);
gsl_linalg_householder_hm (beta, &(v.vector), &(Ag_view.matrix));
// the Householder vector is copied in the zeroed out part
for( size_t r = j+1 ; r < m ; r++ )
A(r,j) = vjm(r-j);
} // column j
#else
// loop over the kprime first columns // loop over the kprime first columns
for(size_t j=0; j < kprime; j++){ for(size_t j=0; j < kprime; j++){
// below, the indices r,c always refer to original A // below, the indices r,c always refer to original A
@ -411,13 +495,14 @@ void householder_(Matrix &A, size_t k)
boost::tie(beta,vjm) = house(xjm); boost::tie(beta,vjm) = house(xjm);
// do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w' // do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w'
householder_update(A, j, beta, vjm) ; householder_update(A, j, beta, vjm);
// the Householder vector is copied in the zeroed out part // the Householder vector is copied in the zeroed out part
for( size_t r = j+1 ; r < m ; r++ ) for( size_t r = j+1 ; r < m ; r++ )
A(r,j) = vjm(r-j); A(r,j) = vjm(r-j);
} // column j } // column j
#endif
} }
/* ************************************************************************* */ /* ************************************************************************* */
@ -530,27 +615,6 @@ Matrix collect(const std::vector<const Matrix *>& matrices, size_t m, size_t n)
dimA2 += M->size2(); dimA2 += M->size2();
} }
// original version
// Matrix A(dimA1, dimA2);
// size_t hindex = 0;
// BOOST_FOREACH(const Matrix* M, matrices) {
// for(size_t d1 = 0; d1 < M->size1(); d1++)
// for(size_t d2 = 0; d2 < M->size2(); d2++)
// A(d1, d2+hindex) = (*M)(d1, d2);
// hindex += M->size2();
// }
// matrix_range version
// Result: slower
// Matrix A(dimA1, dimA2);
// size_t hindex = 0;
// BOOST_FOREACH(const Matrix* M, matrices) {
// ublas::matrix_range<Matrix> mr(A, ublas::range(0, dimA1),
// ublas::range(hindex, hindex+M->size2()));
// noalias(mr) = *M;
// hindex += M->size2();
// }
// memcpy version // memcpy version
Matrix A(dimA1, dimA2); Matrix A(dimA1, dimA2);
double * Aptr = A.data().begin(); double * Aptr = A.data().begin();

View File

@ -157,7 +157,7 @@ Vector column_(const Matrix& A, size_t j);
* @param index of the row * @param index of the row
* @return the row in vector form * @return the row in vector form
*/ */
Vector row(const Matrix& A, size_t j); Vector row_(const Matrix& A, size_t j);
// left multiply with scalar // left multiply with scalar
//inline Matrix operator*(double s, const Matrix& A) { return A*s;} //inline Matrix operator*(double s, const Matrix& A) { return A*s;}

View File

@ -11,6 +11,11 @@
#include <typeinfo> #include <typeinfo>
#include <stdexcept> #include <stdexcept>
#ifdef GSL
#include <gsl/gsl_blas.h> // needed for gsl blas
#include <gsl/gsl_linalg.h>
#endif
#include <boost/numeric/ublas/lu.hpp> #include <boost/numeric/ublas/lu.hpp>
#include <boost/numeric/ublas/io.hpp> #include <boost/numeric/ublas/io.hpp>
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
@ -37,6 +42,23 @@ namespace gtsam {
// __attribute__ ((noinline)) // uncomment to prevent inlining when profiling // __attribute__ ((noinline)) // uncomment to prevent inlining when profiling
static void updateAb(Matrix& Ab, int j, const Vector& a, const Vector& rd) { static void updateAb(Matrix& Ab, int j, const Vector& a, const Vector& rd) {
size_t m = Ab.size1(), n = Ab.size2()-1; size_t m = Ab.size1(), n = Ab.size2()-1;
#ifdef GSL
// Ab(0:m,j+1:n) = Ab(0:m,j+1:n) - a(0:m)*rd(j+1:end)'
// get a view for Ab
gsl_matrix_view Abg = gsl_matrix_view_array(Ab.data().begin(), m, n+1);
gsl_matrix_view Abg_view = gsl_matrix_submatrix (&(Abg.matrix), 0, j+1, m, n-j);
// get a view for a
gsl_vector_const_view ag = gsl_vector_const_view_array(a.data().begin(), m);
// get a view for r
gsl_vector_const_view rdg = gsl_vector_const_view_array(rd.data().begin()+j+1, n-j);
// rank one update
gsl_blas_dger (-1.0, &(ag.vector), &(rdg.vector), &(Abg_view.matrix));
#else
for (int i = 0; i < m; i++) { // update all rows for (int i = 0; i < m; i++) { // update all rows
double ai = a(i); double ai = a(i);
double *Aij = Ab.data().begin() + i * (n+1) + j + 1; double *Aij = Ab.data().begin() + i * (n+1) + j + 1;
@ -45,6 +67,7 @@ namespace gtsam {
for (int j2 = j + 1; j2 < n+1; j2++, Aij++, rptr++) for (int j2 = j + 1; j2 < n+1; j2++, Aij++, rptr++)
*Aij -= ai * (*rptr); *Aij -= ai * (*rptr);
} }
#endif
} }
/* ************************************************************************* */ /* ************************************************************************* */

View File

@ -18,6 +18,11 @@
#include <Windows.h> #include <Windows.h>
#endif #endif
#ifdef GSL
#include <gsl/gsl_blas.h> // needed for gsl blas
#include <gsl/gsl_linalg.h>
#endif
#include <boost/random/normal_distribution.hpp> #include <boost/random/normal_distribution.hpp>
#include <boost/random/variate_generator.hpp> #include <boost/random/variate_generator.hpp>
@ -232,29 +237,30 @@ namespace gtsam {
} }
/* ************************************************************************* */ /* ************************************************************************* */
pair<double, Vector > house(Vector &x) pair<double, Vector > house(const Vector &x) {
{ const double x0 = x(0);
const double x02 = x(0)*x(0); const double x02 = x0*x0;
const double sigma = inner_prod(trans(x),x) - x02;
double beta = 0.0; // old code - GSL verison was actually a bit slower
const double sigma = inner_prod(trans(x),x) - x02;
Vector v(x); v(0) = 1.0; double beta = 0.0;
if( sigma == 0.0 ) Vector v(x); v(0) = 1.0;
beta = 0.0;
else { if( sigma == 0.0 )
double mu = sqrt(x02 + sigma); beta = 0.0;
if( x(0) <= 0.0 ) else {
v(0) = x(0) - mu; double mu = sqrt(x02 + sigma);
else if( x0 <= 0.0 )
v(0) = -sigma / (x(0) + mu); v(0) = x0 - mu;
else
const double v02 = v(0)*v(0); v(0) = -sigma / (x0 + mu);
beta = 2.0 * v02 / (sigma + v02);
v = v / v(0); const double v02 = v(0)*v(0);
} beta = 2.0 * v02 / (sigma + v02);
v = v / v(0);
return make_pair(beta, v); }
return make_pair(beta, v);
} }
/* ************************************************************************* */ /* ************************************************************************* */

View File

@ -211,7 +211,7 @@ Vector operator/(double s, const Vector& v);
* from x, such that the corresponding Householder reflection zeroes out * from x, such that the corresponding Householder reflection zeroes out
* all but x.(j), j is base 0. Golub & Van Loan p 210. * all but x.(j), j is base 0. Golub & Van Loan p 210.
*/ */
std::pair<double,Vector> house(Vector &x); std::pair<double,Vector> house(const Vector &x);
/** /**
* Weighted Householder solution vector, * Weighted Householder solution vector,

View File

@ -174,15 +174,15 @@ TEST( matrix, row )
0., -1., 0., 1., 0., 0., 0.3, 0., -1., 0., 1., 0., 0., 0.3,
1., 0., 0., 0., -1., 0., 0.2, 1., 0., 0., 0., -1., 0., 0.2,
0., 1., 0., 0., 0., -1., -0.1); 0., 1., 0., 0., 0., -1., -0.1);
Vector a1 = row(A, 0); Vector a1 = row_(A, 0);
Vector exp1 = Vector_(7, -1., 0., 1., 0., 0., 0., -0.2); Vector exp1 = Vector_(7, -1., 0., 1., 0., 0., 0., -0.2);
CHECK(assert_equal(a1, exp1)); CHECK(assert_equal(a1, exp1));
Vector a2 = row(A, 2); Vector a2 = row_(A, 2);
Vector exp2 = Vector_(7, 1., 0., 0., 0., -1., 0., 0.2); Vector exp2 = Vector_(7, 1., 0., 0., 0., -1., 0., 0.2);
CHECK(assert_equal(a2, exp2)); CHECK(assert_equal(a2, exp2));
Vector a3 = row(A, 3); Vector a3 = row_(A, 3);
Vector exp3 = Vector_(7, 0., 1., 0., 0., 0., -1., -0.1); Vector exp3 = Vector_(7, 0., 1., 0., 0., 0., -1., -0.1);
CHECK(assert_equal(a3, exp3)); CHECK(assert_equal(a3, exp3));
} }

View File

@ -55,7 +55,7 @@ TEST(timeGaussianFactorGraph, linearTime)
int T = 100000; int T = 100000;
double time1 = timeKalmanSmoother( T); cout << "timeKalmanSmoother( T): " << time1; double time1 = timeKalmanSmoother( T); cout << "timeKalmanSmoother( T): " << time1;
double time2 = timeKalmanSmoother(2*T); cout << " (2*T): " << time2 << endl; double time2 = timeKalmanSmoother(2*T); cout << " (2*T): " << time2 << endl;
DOUBLES_EQUAL(2*time1,time2,0.1); DOUBLES_EQUAL(2*time1,time2,0.2);
} }
/* ************************************************************************* */ /* ************************************************************************* */
@ -70,10 +70,11 @@ TEST(timeGaussianFactorGraph, planar)
// Alex's Machine // Alex's Machine
// Initial: // Initial:
// 1907 (N = 30) : 0.14 // 1907 (N = 30) : 0.14
// (N = 100): 16.36 // (N = 100) : 16.36
// Improved (int->size_t) // Improved (int->size_t)
// (N = 100): 15.39 // (N = 100) : 15.39
// Using GSL/BLAS for updateAb : 12.87
// Switch to 100*100 grid = 10K poses // Switch to 100*100 grid = 10K poses
// 1879: 15.6498 15.3851 15.5279 // 1879: 15.6498 15.3851 15.5279

View File

@ -125,9 +125,12 @@ double timeVScaleRow(size_t m, size_t n, size_t reps) {
/** /**
* Results: * Results:
* Alex's Machine * Alex's Machine (reps = 200000)
* - ublas matrix_column : 4.63 sec * - ublas matrix_column : 4.63 sec
* - naive implementation : 4.70 sec * - naive implementation : 4.70 sec
*
* reps = 2000000
* -
*/ */
double timeColumn(size_t reps) { double timeColumn(size_t reps) {
// create a matrix // create a matrix
@ -144,8 +147,8 @@ double timeColumn(size_t reps) {
boost::timer t; boost::timer t;
for (size_t i=0; i<reps; ++i) for (size_t i=0; i<reps; ++i)
for (size_t j = 0; j<n; ++j) for (size_t j = 0; j<n; ++j)
result = ublas::matrix_column<Matrix>(M, j); //result = ublas::matrix_column<Matrix>(M, j);
//result = column(M, j); result = column_(M, j);
elapsed = t.elapsed(); elapsed = t.elapsed();
} }
return elapsed; return elapsed;
@ -154,10 +157,22 @@ double timeColumn(size_t reps) {
/* /*
* Results * Results
* Alex's machine * Alex's machine
*
* Runs at reps = 500000
* Baseline (no householder, just matrix copy) : 0.05 sec * Baseline (no householder, just matrix copy) : 0.05 sec
* Initial : 8.20 sec * Initial : 8.20 sec
* All in one function : 7.89 sec * All in one function : 7.89 sec
* Replace householder update with GSL, ATLAS : 0.92 sec * Replace householder update with GSL, ATLAS : 0.92 sec
*
* Runs at reps = 2000000
* Baseline (GSL/ATLAS householder update) : 3.61 sec
*
* Runs at reps = 5000000
* Baseline : 8.76 sec
* GSL/Atlas version of updateAb : 9.03 sec // Why does this have an effect?
* Inlining house() : 6.33 sec
* Inlining householder_update [GSL] : 6.15 sec
*
*/ */
double timeHouseholder(size_t reps) { double timeHouseholder(size_t reps) {
// create a matrix // create a matrix
@ -182,35 +197,35 @@ double timeHouseholder(size_t reps) {
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
// // Time collect() // Time collect()
// cout << "Starting Matrix::collect() Timing" << endl; cout << "Starting Matrix::collect() Timing" << endl;
// //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50; //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
// size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000;
// double collect_time1 = timeCollect(p, m, n, false, reps); double collect_time1 = timeCollect(p, m, n, false, reps);
// double collect_time2 = timeCollect(p, m, n, true, reps); double collect_time2 = timeCollect(p, m, n, true, reps);
// cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
// cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl;
//
// // Time vector_scale_column // Time vector_scale_column
// cout << "Starting Matrix::vector_scale(column) Timing" << endl; cout << "Starting Matrix::vector_scale(column) Timing" << endl;
// size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000;
// double vsColumn_time = timeVScaleColumn(m1, n1, reps1); double vsColumn_time = timeVScaleColumn(m1, n1, reps1);
// cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl;
//
// // Time vector_scale_row // Time vector_scale_row
// cout << "Starting Matrix::vector_scale(row) Timing" << endl; cout << "Starting Matrix::vector_scale(row) Timing" << endl;
// double vsRow_time = timeVScaleRow(m1, n1, reps1); double vsRow_time = timeVScaleRow(m1, n1, reps1);
// cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl;
//
// // Time column_() NOTE: using the ublas version // Time column_() NOTE: using the ublas version
// cout << "Starting column_() Timing" << endl; cout << "Starting column_() Timing" << endl;
// size_t reps2 = 200000; size_t reps2 = 2000000;
// double column_time = timeColumn(reps2); double column_time = timeColumn(reps2);
// cout << "Time: " << column_time << " sec" << endl; cout << "Time: " << column_time << " sec" << endl;
// Time householder_ function // Time householder_ function
cout << "Starting householder_() Timing" << endl; cout << "Starting householder_() Timing" << endl;
size_t reps_house = 500000; size_t reps_house = 5000000;
double house_time = timeHouseholder(reps_house); double house_time = timeHouseholder(reps_house);
cout << "Elapsed time for householder_() : " << house_time << " sec" << endl; cout << "Elapsed time for householder_() : " << house_time << " sec" << endl;