diff --git a/cpp/Matrix.cpp b/cpp/Matrix.cpp index ade536bff..3cb7f72c0 100644 --- a/cpp/Matrix.cpp +++ b/cpp/Matrix.cpp @@ -156,6 +156,19 @@ void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e) { #endif } +/* ************************************************************************* */ +void multiplyAdd(const Matrix& A, const Vector& x, Vector& e) { + // ublas e += prod(A,x) is terribly slow + size_t m = A.size1(), n = A.size2(); + double * ei = e.data().begin(); + const double * aij = A.data().begin(); + for (int i = 0; i < m; i++, ei++) { + const double * xj = x.data().begin(); + for (int j = 0; j < n; j++, aij++, xj++) + (*ei) += (*aij) * (*xj); + } +} + /* ************************************************************************* */ Vector operator^(const Matrix& A, const Vector & v) { if (A.size1()!=v.size()) throw std::invalid_argument( @@ -186,6 +199,20 @@ void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector #endif } +/* ************************************************************************* */ +void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x) { + // ublas x += prod(trans(A),e) is terribly slow + // TODO: use BLAS + size_t m = A.size1(), n = A.size2(); + double * xj = x.data().begin(); + for (int j = 0; j < n; j++,xj++) { + const double * ei = e.data().begin(); + const double * aij = A.data().begin() + j; + for (int i = 0; i < m; i++, aij+=n, ei++) + (*xj) += (*aij) * (*ei); + } +} + /* ************************************************************************* */ void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, SubVector x) { // ublas x += prod(trans(A),e) is terribly slow diff --git a/cpp/Matrix.h b/cpp/Matrix.h index 56764650f..ce796cf77 100644 --- a/cpp/Matrix.h +++ b/cpp/Matrix.h @@ -91,6 +91,11 @@ inline Vector operator*(const Matrix& A, const Vector & v) { return prod(A,v);} */ void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e); +/** + * BLAS Level-2 style e <- e + A*x + */ +void multiplyAdd(const Matrix& A, const Vector& x, Vector& e); + /** * overload ^ for trans(A)*v * We transpose the vectors for speed. @@ -102,6 +107,11 @@ Vector operator^(const Matrix& A, const Vector & v); */ void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector& x); +/** + * BLAS Level-2 style x <- x + A'*e + */ +void transposeMultiplyAdd(const Matrix& A, const Vector& e, Vector& x); + /** * BLAS Level-2 style x <- x + alpha*A'*e */