278 lines
7.5 KiB
C++
278 lines
7.5 KiB
C++
/* ----------------------------------------------------------------------------
|
|
|
|
* GTSAM Copyright 2010, Georgia Tech Research Corporation,
|
|
* Atlanta, Georgia 30332-0415
|
|
* All Rights Reserved
|
|
* Authors: Frank Dellaert, et al. (see THANKS for the full author list)
|
|
|
|
* See LICENSE for the license information
|
|
|
|
* -------------------------------------------------------------------------- */
|
|
|
|
/*
|
|
* @file timeMatrix.cpp
|
|
* @brief Performs timing and profiling for Matrix operations
|
|
* @author Alex Cunningham
|
|
*/
|
|
|
|
#include <iostream>
|
|
#include <boost/timer.hpp>
|
|
#include <gtsam/base/Matrix.h>
|
|
|
|
using namespace std;
|
|
using namespace gtsam;
|
|
|
|
/*
|
|
* Results:
|
|
* Alex's Machine:
|
|
* (using p = 100000 m = 10 n = 12 reps = 50) - Average times
|
|
* - (1st pass of simple changes) no pass: 0.184 sec , pass: 0.181 sec
|
|
* - (1st rev memcpy) no pass: 0.181 sec , pass: 0.180 sec
|
|
* - (1st rev matrix_range) no pass: 0.186 sec , pass: 0.184 sec
|
|
* (using p = 10 m = 10 n = 12 reps = 10000000)
|
|
* - (matrix_range version) no pass: 24.21 sec , pass: 23.97 sec
|
|
* - (memcpy version) no pass: 18.96 sec , pass: 18.39 sec
|
|
* - (original version) no pass: 23.45 sec , pass: 22.80 sec
|
|
* - rev 2100 no pass: 18.45 sec , pass: 18.35 sec
|
|
*/
|
|
double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) {
|
|
// create a large number of matrices
|
|
// p = number of matrices
|
|
// m = rows per matrix
|
|
// n = columns per matrix
|
|
// reps = number of repetitions
|
|
|
|
// fill the matrices with identities
|
|
vector<const Matrix *> matrices;
|
|
for (size_t i=0; i<p;++i) {
|
|
Matrix * M = new Matrix;
|
|
(*M) = eye(m,n);
|
|
matrices.push_back(M);
|
|
}
|
|
|
|
// start timing
|
|
Matrix result;
|
|
double elapsed;
|
|
{
|
|
boost::timer t;
|
|
|
|
if (passDims)
|
|
for (size_t i=0; i<reps; ++i)
|
|
result = collect(matrices, m, n);
|
|
else
|
|
for (size_t i=0; i<reps; ++i)
|
|
result = collect(matrices);
|
|
elapsed = t.elapsed();
|
|
}
|
|
// delete the matrices
|
|
for (size_t i=0; i<p;++i) {
|
|
delete matrices[i];
|
|
}
|
|
|
|
return elapsed;
|
|
//return elapsed/reps;
|
|
}
|
|
|
|
/*
|
|
* Results:
|
|
* Alex's Machine:
|
|
* - Original : 0.60 sec (x1000)
|
|
* - 1st Rev : 0.49 sec (x1000)
|
|
* - rev 2100 : 0.52 sec (x1000)
|
|
*/
|
|
double timeVScaleColumn(size_t m, size_t n, size_t reps) {
|
|
// make a matrix to scale
|
|
Matrix M(m, n);
|
|
for (size_t i=0; i<m; ++i)
|
|
for (size_t j=0; j<n; ++j)
|
|
M(i,j) = 2*i+j;
|
|
|
|
// make a vector to use for scaling
|
|
Vector V(m);
|
|
for (size_t i=0; i<m; ++i)
|
|
V(i) = i*2;
|
|
|
|
double elapsed;
|
|
Matrix result;
|
|
{
|
|
boost::timer t;
|
|
for (size_t i=0; i<reps; ++i)
|
|
Matrix result = vector_scale(M,V);
|
|
elapsed = t.elapsed();
|
|
}
|
|
|
|
return elapsed;
|
|
}
|
|
|
|
/*
|
|
* Results:
|
|
* Alex's Machine:
|
|
* - Original : 0.54 sec (x1000)
|
|
* - 1st rev : 0.44 sec (x1000)
|
|
* - rev 2100 : 1.69 sec (x1000)
|
|
*/
|
|
double timeVScaleRow(size_t m, size_t n, size_t reps) {
|
|
// make a matrix to scale
|
|
Matrix M(m, n);
|
|
for (size_t i=0; i<m; ++i)
|
|
for (size_t j=0; j<n; ++j)
|
|
M(i,j) = 2*i+j;
|
|
|
|
// make a vector to use for scaling
|
|
Vector V(n);
|
|
for (size_t i=0; i<n; ++i)
|
|
V(i) = i*2;
|
|
|
|
double elapsed;
|
|
Matrix result;
|
|
{
|
|
boost::timer t;
|
|
for (size_t i=0; i<reps; ++i)
|
|
result = vector_scale(V,M);
|
|
elapsed = t.elapsed();
|
|
}
|
|
|
|
return elapsed;
|
|
}
|
|
|
|
/**
|
|
* Results:
|
|
* Alex's Machine (reps = 200000)
|
|
* - ublas matrix_column : 4.63 sec
|
|
* - naive implementation : 4.70 sec
|
|
*
|
|
* reps = 2000000
|
|
* - rev 2100 : 45.11 sec
|
|
*/
|
|
double timeColumn(size_t reps) {
|
|
// create a matrix
|
|
size_t m = 100; size_t n = 100;
|
|
Matrix M(m, n);
|
|
for (size_t i=0; i<m; ++i)
|
|
for (size_t j=0; j<n; ++j)
|
|
M(i,j) = 2*i+j;
|
|
|
|
// extract a column
|
|
double elapsed;
|
|
Vector result;
|
|
{
|
|
boost::timer t;
|
|
for (size_t i=0; i<reps; ++i)
|
|
for (size_t j = 0; j<n; ++j)
|
|
//result = ublas::matrix_column<Matrix>(M, j);
|
|
result = column(M, j);
|
|
elapsed = t.elapsed();
|
|
}
|
|
return elapsed;
|
|
}
|
|
|
|
/*
|
|
* Results
|
|
* Alex's machine
|
|
*
|
|
* Runs at reps = 500000
|
|
* Baseline (no householder, just matrix copy) : 0.05 sec
|
|
* Initial : 8.20 sec
|
|
* All in one function : 7.89 sec
|
|
* Replace householder update with GSL, ATLAS : 0.92 sec
|
|
*
|
|
* Runs at reps = 2000000
|
|
* Baseline (GSL/ATLAS householder update) : 3.61 sec
|
|
*
|
|
* Runs at reps = 5000000
|
|
* Baseline : 8.76 sec
|
|
* GSL/Atlas version of updateAb : 9.03 sec // Why does this have an effect?
|
|
* Inlining house() : 6.33 sec
|
|
* Inlining householder_update [GSL] : 6.15 sec
|
|
* Rev 2100 : 5.75 sec
|
|
*
|
|
*/
|
|
double timeHouseholder(size_t reps) {
|
|
// create a matrix
|
|
Matrix Abase = (Matrix(4, 7) <<
|
|
-5, 0, 5, 0, 0, 0, -1,
|
|
00, -5, 0, 5, 0, 0, 1.5,
|
|
10, 0, 0, 0,-10, 0, 2,
|
|
00, 10, 0, 0, 0,-10, -1).finished();
|
|
|
|
// perform timing
|
|
double elapsed;
|
|
{
|
|
boost::timer t;
|
|
for (size_t i=0; i<reps; ++i) {
|
|
Matrix A = Abase;
|
|
householder_(A,3);
|
|
}
|
|
elapsed = t.elapsed();
|
|
}
|
|
return elapsed;
|
|
}
|
|
/**
|
|
* Results: (Alex's machine)
|
|
* reps: 200000
|
|
*
|
|
* Initial (boost matrix proxies) - 12.08
|
|
* Direct pointer method - 4.62
|
|
*/
|
|
double timeMatrixInsert(size_t reps) {
|
|
// create a matrix
|
|
Matrix bigBase = zeros(100, 100);
|
|
Matrix small = eye(5,5);
|
|
|
|
// perform timing
|
|
double elapsed;
|
|
{
|
|
boost::timer t;
|
|
Matrix big = bigBase;
|
|
for (size_t rep=0; rep<reps; ++rep)
|
|
for (size_t i=0; i<100; i += 5)
|
|
for (size_t j=0; j<100; j += 5)
|
|
insertSub(big, small, i,j);
|
|
elapsed = t.elapsed();
|
|
}
|
|
return elapsed;
|
|
}
|
|
|
|
int main(int argc, char ** argv) {
|
|
|
|
// Time collect()
|
|
cout << "Starting Matrix::collect() Timing" << endl;
|
|
//size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
|
|
size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000;
|
|
double collect_time1 = timeCollect(p, m, n, false, reps);
|
|
double collect_time2 = timeCollect(p, m, n, true, reps);
|
|
cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
|
|
cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl;
|
|
|
|
// Time vector_scale_column
|
|
cout << "Starting Matrix::vector_scale(column) Timing" << endl;
|
|
size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000;
|
|
double vsColumn_time = timeVScaleColumn(m1, n1, reps1);
|
|
cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl;
|
|
|
|
// Time vector_scale_row
|
|
cout << "Starting Matrix::vector_scale(row) Timing" << endl;
|
|
double vsRow_time = timeVScaleRow(m1, n1, reps1);
|
|
cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl;
|
|
|
|
// Time column() NOTE: using the ublas version
|
|
cout << "Starting column() Timing" << endl;
|
|
size_t reps2 = 2000000;
|
|
double column_time = timeColumn(reps2);
|
|
cout << "Time: " << column_time << " sec" << endl;
|
|
|
|
// Time householder_ function
|
|
cout << "Starting householder_() Timing" << endl;
|
|
size_t reps_house = 5000000;
|
|
double house_time = timeHouseholder(reps_house);
|
|
cout << "Elapsed time for householder_() : " << house_time << " sec" << endl;
|
|
|
|
// Time matrix insertion
|
|
cout << "Starting insertSub() Timing" << endl;
|
|
size_t reps_insert = 200000;
|
|
double insert_time = timeMatrixInsert(reps_insert);
|
|
cout << "Elapsed time for insertSub() : " << insert_time << " sec" << endl;
|
|
|
|
return 0;
|
|
}
|