Improved performance of updateAb in NoiseModel with GSL/ATLAS. Various other small optimizations were made.
parent
2183e09c67
commit
ac746ccead
124
cpp/Matrix.cpp
124
cpp/Matrix.cpp
|
@ -171,16 +171,19 @@ Vector column_(const Matrix& A, size_t j) {
|
|||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
Vector row(const Matrix& A, size_t i) {
|
||||
Vector row_(const Matrix& A, size_t i) {
|
||||
if (i>=A.size1())
|
||||
throw invalid_argument("Row index out of bounds!");
|
||||
|
||||
const double * Aptr = A.data().begin() + A.size2() * i;
|
||||
return Vector_(A.size2(), Aptr);
|
||||
|
||||
// TODO: improve this
|
||||
size_t n = A.size2();
|
||||
Vector a(n);
|
||||
for (size_t j=0; j<n; ++j)
|
||||
a(j) = A(i,j);
|
||||
return a;
|
||||
// size_t n = A.size2();
|
||||
// Vector a(n);
|
||||
// for (size_t j=0; j<n; ++j)
|
||||
// a(j) = A(i,j);
|
||||
// return a;
|
||||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
|
@ -322,8 +325,30 @@ void householder_update(Matrix &A, int j, double beta, const Vector& vjm) {
|
|||
// __attribute__ ((noinline)) // uncomment to prevent inlining when profiling
|
||||
static void updateAb(Matrix& A, Vector& b, int j, const Vector& a,
|
||||
const Vector& r, double d) {
|
||||
// TODO: do some optimization here if possible
|
||||
const size_t m = A.size1(), n = A.size2();
|
||||
#ifdef GSL
|
||||
|
||||
// update A
|
||||
// A(0:m,j+1:end) = A(0:m,j+1:end) - a(0:m)*r(j+1:end)'
|
||||
// get a view for A
|
||||
// gsl_matrix_view Ag = gsl_matrix_view_array(A.data().begin(), m, n);
|
||||
// gsl_matrix_view Ag_view = gsl_matrix_submatrix (&(Ag.matrix), 0, j+1, m, n-j-1);
|
||||
// // get a view for r
|
||||
// gsl_vector_const_view rg = gsl_vector_const_view_array(r.data().begin()+j+1, n-j-1);
|
||||
// // get a view for a
|
||||
// gsl_vector_const_view ag = gsl_vector_const_view_array(a.data().begin(), m);
|
||||
//
|
||||
// // rank one update
|
||||
// gsl_blas_dger (-1.0, &(ag.vector), &(rg.vector), &(Ag_view.matrix));
|
||||
//
|
||||
// // update b
|
||||
// double * bptr = b.data().begin();
|
||||
// const double * aptr = a.data().begin();
|
||||
// for (size_t i = 0; i < m; i++) {
|
||||
// *(bptr+i) -= d* *(aptr+i);
|
||||
// }
|
||||
|
||||
// original
|
||||
for (size_t i = 0; i < m; i++) { // update all rows
|
||||
double ai = a(i);
|
||||
b(i) -= ai * d;
|
||||
|
@ -333,6 +358,18 @@ static void updateAb(Matrix& A, Vector& b, int j, const Vector& a,
|
|||
for (size_t j2 = j + 1; j2 < n; j2++, Aij++, rptr++)
|
||||
*Aij -= ai * (*rptr);
|
||||
}
|
||||
|
||||
#else
|
||||
for (size_t i = 0; i < m; i++) { // update all rows
|
||||
double ai = a(i);
|
||||
b(i) -= ai * d;
|
||||
double *Aij = A.data().begin() + i * n + j + 1;
|
||||
const double *rptr = r.data().begin() + j + 1;
|
||||
// A(i,j+1:end) -= ai*r(j+1:end)
|
||||
for (size_t j2 = j + 1; j2 < n; j2++, Aij++, rptr++)
|
||||
*Aij -= ai * (*rptr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
|
@ -396,7 +433,54 @@ void householder_(Matrix &A, size_t k)
|
|||
{
|
||||
const size_t m = A.size1(), n = A.size2(), kprime = min(k,min(m,n));
|
||||
|
||||
// Original version
|
||||
#ifdef GSL
|
||||
// loop over the kprime first columns
|
||||
for(size_t j=0; j < kprime; j++){
|
||||
// below, the indices r,c always refer to original A
|
||||
|
||||
// copy column from matrix to xjm, i.e. x(j:m) = A(j:m,j)
|
||||
Vector xjm(m-j);
|
||||
for(size_t r = j ; r < m; r++)
|
||||
xjm(r-j) = A(r,j);
|
||||
|
||||
// calculate the Householder vector
|
||||
// COPIED IN: boost::tie(beta,vjm) = house(xjm);
|
||||
const double x0 = xjm(0);
|
||||
const double x02 = x0*x0;
|
||||
|
||||
const double sigma = inner_prod(trans(xjm),xjm) - x02;
|
||||
double beta = 0.0; Vector vjm(xjm); vjm(0) = 1.0;
|
||||
|
||||
if( sigma == 0.0 )
|
||||
beta = 0.0;
|
||||
else {
|
||||
double mu = sqrt(x02 + sigma);
|
||||
if( x0 <= 0.0 )
|
||||
vjm(0) = x0 - mu;
|
||||
else
|
||||
vjm(0) = -sigma / (x0 + mu);
|
||||
|
||||
const double v02 = vjm(0)*vjm(0);
|
||||
beta = 2.0 * v02 / (sigma + v02);
|
||||
vjm = vjm / vjm(0);
|
||||
}
|
||||
|
||||
// do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w'
|
||||
//householder_update(A, j, beta, vjm);
|
||||
|
||||
// inlined use GSL version
|
||||
gsl_vector_const_view v = gsl_vector_const_view_array(vjm.data().begin(), m-j);
|
||||
gsl_matrix_view Ag = gsl_matrix_view_array(A.data().begin(), m, n);
|
||||
gsl_matrix_view Ag_view = gsl_matrix_submatrix (&(Ag.matrix), j, 0, m-j, n);
|
||||
gsl_linalg_householder_hm (beta, &(v.vector), &(Ag_view.matrix));
|
||||
|
||||
// the Householder vector is copied in the zeroed out part
|
||||
for( size_t r = j+1 ; r < m ; r++ )
|
||||
A(r,j) = vjm(r-j);
|
||||
|
||||
} // column j
|
||||
|
||||
#else
|
||||
// loop over the kprime first columns
|
||||
for(size_t j=0; j < kprime; j++){
|
||||
// below, the indices r,c always refer to original A
|
||||
|
@ -411,13 +495,14 @@ void householder_(Matrix &A, size_t k)
|
|||
boost::tie(beta,vjm) = house(xjm);
|
||||
|
||||
// do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w'
|
||||
householder_update(A, j, beta, vjm) ;
|
||||
householder_update(A, j, beta, vjm);
|
||||
|
||||
// the Householder vector is copied in the zeroed out part
|
||||
for( size_t r = j+1 ; r < m ; r++ )
|
||||
A(r,j) = vjm(r-j);
|
||||
|
||||
} // column j
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
|
@ -530,27 +615,6 @@ Matrix collect(const std::vector<const Matrix *>& matrices, size_t m, size_t n)
|
|||
dimA2 += M->size2();
|
||||
}
|
||||
|
||||
// original version
|
||||
// Matrix A(dimA1, dimA2);
|
||||
// size_t hindex = 0;
|
||||
// BOOST_FOREACH(const Matrix* M, matrices) {
|
||||
// for(size_t d1 = 0; d1 < M->size1(); d1++)
|
||||
// for(size_t d2 = 0; d2 < M->size2(); d2++)
|
||||
// A(d1, d2+hindex) = (*M)(d1, d2);
|
||||
// hindex += M->size2();
|
||||
// }
|
||||
|
||||
// matrix_range version
|
||||
// Result: slower
|
||||
// Matrix A(dimA1, dimA2);
|
||||
// size_t hindex = 0;
|
||||
// BOOST_FOREACH(const Matrix* M, matrices) {
|
||||
// ublas::matrix_range<Matrix> mr(A, ublas::range(0, dimA1),
|
||||
// ublas::range(hindex, hindex+M->size2()));
|
||||
// noalias(mr) = *M;
|
||||
// hindex += M->size2();
|
||||
// }
|
||||
|
||||
// memcpy version
|
||||
Matrix A(dimA1, dimA2);
|
||||
double * Aptr = A.data().begin();
|
||||
|
|
|
@ -157,7 +157,7 @@ Vector column_(const Matrix& A, size_t j);
|
|||
* @param index of the row
|
||||
* @return the row in vector form
|
||||
*/
|
||||
Vector row(const Matrix& A, size_t j);
|
||||
Vector row_(const Matrix& A, size_t j);
|
||||
|
||||
// left multiply with scalar
|
||||
//inline Matrix operator*(double s, const Matrix& A) { return A*s;}
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
#include <typeinfo>
|
||||
#include <stdexcept>
|
||||
|
||||
#ifdef GSL
|
||||
#include <gsl/gsl_blas.h> // needed for gsl blas
|
||||
#include <gsl/gsl_linalg.h>
|
||||
#endif
|
||||
|
||||
#include <boost/numeric/ublas/lu.hpp>
|
||||
#include <boost/numeric/ublas/io.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
|
@ -37,6 +42,23 @@ namespace gtsam {
|
|||
// __attribute__ ((noinline)) // uncomment to prevent inlining when profiling
|
||||
static void updateAb(Matrix& Ab, int j, const Vector& a, const Vector& rd) {
|
||||
size_t m = Ab.size1(), n = Ab.size2()-1;
|
||||
#ifdef GSL
|
||||
// Ab(0:m,j+1:n) = Ab(0:m,j+1:n) - a(0:m)*rd(j+1:end)'
|
||||
// get a view for Ab
|
||||
gsl_matrix_view Abg = gsl_matrix_view_array(Ab.data().begin(), m, n+1);
|
||||
gsl_matrix_view Abg_view = gsl_matrix_submatrix (&(Abg.matrix), 0, j+1, m, n-j);
|
||||
|
||||
// get a view for a
|
||||
gsl_vector_const_view ag = gsl_vector_const_view_array(a.data().begin(), m);
|
||||
|
||||
// get a view for r
|
||||
gsl_vector_const_view rdg = gsl_vector_const_view_array(rd.data().begin()+j+1, n-j);
|
||||
|
||||
// rank one update
|
||||
gsl_blas_dger (-1.0, &(ag.vector), &(rdg.vector), &(Abg_view.matrix));
|
||||
|
||||
#else
|
||||
|
||||
for (int i = 0; i < m; i++) { // update all rows
|
||||
double ai = a(i);
|
||||
double *Aij = Ab.data().begin() + i * (n+1) + j + 1;
|
||||
|
@ -45,6 +67,7 @@ namespace gtsam {
|
|||
for (int j2 = j + 1; j2 < n+1; j2++, Aij++, rptr++)
|
||||
*Aij -= ai * (*rptr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
|
|
|
@ -18,6 +18,11 @@
|
|||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef GSL
|
||||
#include <gsl/gsl_blas.h> // needed for gsl blas
|
||||
#include <gsl/gsl_linalg.h>
|
||||
#endif
|
||||
|
||||
#include <boost/random/normal_distribution.hpp>
|
||||
#include <boost/random/variate_generator.hpp>
|
||||
|
||||
|
@ -232,9 +237,11 @@ namespace gtsam {
|
|||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
pair<double, Vector > house(Vector &x)
|
||||
{
|
||||
const double x02 = x(0)*x(0);
|
||||
pair<double, Vector > house(const Vector &x) {
|
||||
const double x0 = x(0);
|
||||
const double x02 = x0*x0;
|
||||
|
||||
// old code - GSL verison was actually a bit slower
|
||||
const double sigma = inner_prod(trans(x),x) - x02;
|
||||
double beta = 0.0;
|
||||
|
||||
|
@ -244,16 +251,15 @@ namespace gtsam {
|
|||
beta = 0.0;
|
||||
else {
|
||||
double mu = sqrt(x02 + sigma);
|
||||
if( x(0) <= 0.0 )
|
||||
v(0) = x(0) - mu;
|
||||
if( x0 <= 0.0 )
|
||||
v(0) = x0 - mu;
|
||||
else
|
||||
v(0) = -sigma / (x(0) + mu);
|
||||
v(0) = -sigma / (x0 + mu);
|
||||
|
||||
const double v02 = v(0)*v(0);
|
||||
beta = 2.0 * v02 / (sigma + v02);
|
||||
v = v / v(0);
|
||||
}
|
||||
|
||||
return make_pair(beta, v);
|
||||
}
|
||||
|
||||
|
|
|
@ -211,7 +211,7 @@ Vector operator/(double s, const Vector& v);
|
|||
* from x, such that the corresponding Householder reflection zeroes out
|
||||
* all but x.(j), j is base 0. Golub & Van Loan p 210.
|
||||
*/
|
||||
std::pair<double,Vector> house(Vector &x);
|
||||
std::pair<double,Vector> house(const Vector &x);
|
||||
|
||||
/**
|
||||
* Weighted Householder solution vector,
|
||||
|
|
|
@ -174,15 +174,15 @@ TEST( matrix, row )
|
|||
0., -1., 0., 1., 0., 0., 0.3,
|
||||
1., 0., 0., 0., -1., 0., 0.2,
|
||||
0., 1., 0., 0., 0., -1., -0.1);
|
||||
Vector a1 = row(A, 0);
|
||||
Vector a1 = row_(A, 0);
|
||||
Vector exp1 = Vector_(7, -1., 0., 1., 0., 0., 0., -0.2);
|
||||
CHECK(assert_equal(a1, exp1));
|
||||
|
||||
Vector a2 = row(A, 2);
|
||||
Vector a2 = row_(A, 2);
|
||||
Vector exp2 = Vector_(7, 1., 0., 0., 0., -1., 0., 0.2);
|
||||
CHECK(assert_equal(a2, exp2));
|
||||
|
||||
Vector a3 = row(A, 3);
|
||||
Vector a3 = row_(A, 3);
|
||||
Vector exp3 = Vector_(7, 0., 1., 0., 0., 0., -1., -0.1);
|
||||
CHECK(assert_equal(a3, exp3));
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ TEST(timeGaussianFactorGraph, linearTime)
|
|||
int T = 100000;
|
||||
double time1 = timeKalmanSmoother( T); cout << "timeKalmanSmoother( T): " << time1;
|
||||
double time2 = timeKalmanSmoother(2*T); cout << " (2*T): " << time2 << endl;
|
||||
DOUBLES_EQUAL(2*time1,time2,0.1);
|
||||
DOUBLES_EQUAL(2*time1,time2,0.2);
|
||||
}
|
||||
|
||||
/* ************************************************************************* */
|
||||
|
@ -71,9 +71,10 @@ TEST(timeGaussianFactorGraph, planar)
|
|||
// Alex's Machine
|
||||
// Initial:
|
||||
// 1907 (N = 30) : 0.14
|
||||
// (N = 100): 16.36
|
||||
// (N = 100) : 16.36
|
||||
// Improved (int->size_t)
|
||||
// (N = 100): 15.39
|
||||
// (N = 100) : 15.39
|
||||
// Using GSL/BLAS for updateAb : 12.87
|
||||
|
||||
// Switch to 100*100 grid = 10K poses
|
||||
// 1879: 15.6498 15.3851 15.5279
|
||||
|
|
|
@ -125,9 +125,12 @@ double timeVScaleRow(size_t m, size_t n, size_t reps) {
|
|||
|
||||
/**
|
||||
* Results:
|
||||
* Alex's Machine
|
||||
* Alex's Machine (reps = 200000)
|
||||
* - ublas matrix_column : 4.63 sec
|
||||
* - naive implementation : 4.70 sec
|
||||
*
|
||||
* reps = 2000000
|
||||
* -
|
||||
*/
|
||||
double timeColumn(size_t reps) {
|
||||
// create a matrix
|
||||
|
@ -144,8 +147,8 @@ double timeColumn(size_t reps) {
|
|||
boost::timer t;
|
||||
for (size_t i=0; i<reps; ++i)
|
||||
for (size_t j = 0; j<n; ++j)
|
||||
result = ublas::matrix_column<Matrix>(M, j);
|
||||
//result = column(M, j);
|
||||
//result = ublas::matrix_column<Matrix>(M, j);
|
||||
result = column_(M, j);
|
||||
elapsed = t.elapsed();
|
||||
}
|
||||
return elapsed;
|
||||
|
@ -154,10 +157,22 @@ double timeColumn(size_t reps) {
|
|||
/*
|
||||
* Results
|
||||
* Alex's machine
|
||||
*
|
||||
* Runs at reps = 500000
|
||||
* Baseline (no householder, just matrix copy) : 0.05 sec
|
||||
* Initial : 8.20 sec
|
||||
* All in one function : 7.89 sec
|
||||
* Replace householder update with GSL, ATLAS : 0.92 sec
|
||||
*
|
||||
* Runs at reps = 2000000
|
||||
* Baseline (GSL/ATLAS householder update) : 3.61 sec
|
||||
*
|
||||
* Runs at reps = 5000000
|
||||
* Baseline : 8.76 sec
|
||||
* GSL/Atlas version of updateAb : 9.03 sec // Why does this have an effect?
|
||||
* Inlining house() : 6.33 sec
|
||||
* Inlining householder_update [GSL] : 6.15 sec
|
||||
*
|
||||
*/
|
||||
double timeHouseholder(size_t reps) {
|
||||
// create a matrix
|
||||
|
@ -182,35 +197,35 @@ double timeHouseholder(size_t reps) {
|
|||
|
||||
int main(int argc, char ** argv) {
|
||||
|
||||
// // Time collect()
|
||||
// cout << "Starting Matrix::collect() Timing" << endl;
|
||||
// //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
|
||||
// size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000;
|
||||
// double collect_time1 = timeCollect(p, m, n, false, reps);
|
||||
// double collect_time2 = timeCollect(p, m, n, true, reps);
|
||||
// cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
|
||||
// cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl;
|
||||
//
|
||||
// // Time vector_scale_column
|
||||
// cout << "Starting Matrix::vector_scale(column) Timing" << endl;
|
||||
// size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000;
|
||||
// double vsColumn_time = timeVScaleColumn(m1, n1, reps1);
|
||||
// cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl;
|
||||
//
|
||||
// // Time vector_scale_row
|
||||
// cout << "Starting Matrix::vector_scale(row) Timing" << endl;
|
||||
// double vsRow_time = timeVScaleRow(m1, n1, reps1);
|
||||
// cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl;
|
||||
//
|
||||
// // Time column_() NOTE: using the ublas version
|
||||
// cout << "Starting column_() Timing" << endl;
|
||||
// size_t reps2 = 200000;
|
||||
// double column_time = timeColumn(reps2);
|
||||
// cout << "Time: " << column_time << " sec" << endl;
|
||||
// Time collect()
|
||||
cout << "Starting Matrix::collect() Timing" << endl;
|
||||
//size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
|
||||
size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000;
|
||||
double collect_time1 = timeCollect(p, m, n, false, reps);
|
||||
double collect_time2 = timeCollect(p, m, n, true, reps);
|
||||
cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
|
||||
cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl;
|
||||
|
||||
// Time vector_scale_column
|
||||
cout << "Starting Matrix::vector_scale(column) Timing" << endl;
|
||||
size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000;
|
||||
double vsColumn_time = timeVScaleColumn(m1, n1, reps1);
|
||||
cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl;
|
||||
|
||||
// Time vector_scale_row
|
||||
cout << "Starting Matrix::vector_scale(row) Timing" << endl;
|
||||
double vsRow_time = timeVScaleRow(m1, n1, reps1);
|
||||
cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl;
|
||||
|
||||
// Time column_() NOTE: using the ublas version
|
||||
cout << "Starting column_() Timing" << endl;
|
||||
size_t reps2 = 2000000;
|
||||
double column_time = timeColumn(reps2);
|
||||
cout << "Time: " << column_time << " sec" << endl;
|
||||
|
||||
// Time householder_ function
|
||||
cout << "Starting householder_() Timing" << endl;
|
||||
size_t reps_house = 500000;
|
||||
size_t reps_house = 5000000;
|
||||
double house_time = timeHouseholder(reps_house);
|
||||
cout << "Elapsed time for householder_() : " << house_time << " sec" << endl;
|
||||
|
||||
|
|
Loading…
Reference in New Issue