Implemented a version of collect() using matrix_range, but it had worse performance than memcpy. New version of vector_scale[column] with new pointer arithmetic implementation for approx 20% improvement.

release/4.3a0
Alex Cunningham 2010-01-20 02:28:25 +00:00
parent 820ae5682d
commit 03ec3e3e62
2 changed files with 23 additions and 3 deletions

View File

@ -9,6 +9,7 @@
#include <iomanip>
#include <list>
#include <boost/numeric/ublas/matrix_proxy.hpp>
#include <boost/foreach.hpp>
#include <boost/numeric/ublas/lu.hpp>
#include <boost/numeric/ublas/io.hpp>
@ -504,6 +505,19 @@ Matrix collect(const std::vector<const Matrix *>& matrices, size_t m, size_t n)
dimA1 = M->size1(); // TODO: should check if all the same !
dimA2 += M->size2();
}
// matrix_range version
// Result: slower
// Matrix A(dimA1, dimA2);
// size_t hindex = 0;
// BOOST_FOREACH(const Matrix* M, matrices) {
// ublas::matrix_range<Matrix> mr(A, ublas::range(0, dimA1),
// ublas::range(hindex, hindex+M->size2()));
// mr = *M;
// hindex += M->size2();
// }
// memcpy version
Matrix A(dimA1, dimA2);
double * Aptr = A.data().begin();
size_t hindex = 0;
@ -561,9 +575,14 @@ Matrix vector_scale(const Vector& v, const Matrix& A) {
// column scaling
Matrix vector_scale(const Matrix& A, const Vector& v) {
Matrix M(A);
for (int i=0; i<A.size1(); ++i)
for (int j=0; j<A.size2(); ++j)
M(i,j) *= v(j);
size_t m = A.size1(); size_t n = A.size2();
const double * vptr = v.data().begin();
for (size_t i=0; i<m; ++i) { // loop over rows
for (size_t j=0; j<n; ++j) { // loop over columns
double * Mptr = M.data().begin() + i*n + j;
(*Mptr) = (*Mptr) * *(vptr+j);
}
}
return M;
}

View File

@ -58,6 +58,7 @@ double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) {
* Results:
* Alex's Machine:
* - Original: 0.60 sec (x1000)
* - 1st Rev : 0.49 sec (x1000)
*/
double timeVScaleColumn(size_t m, size_t n, size_t reps) {
// make a matrix to scale