Faster non-GSL versions of BLAS 2-style calls
parent
efe51c8419
commit
5554f6fc7e
|
|
@ -145,11 +145,13 @@ void multiplyAdd(double alpha, const Matrix& A, const Vector& x, Vector& e) {
|
||||||
gsl_blas_dgemv (CblasNoTrans, alpha, &(Ag.matrix), &(xg.vector), 1.0, &(eg.vector));
|
gsl_blas_dgemv (CblasNoTrans, alpha, &(Ag.matrix), &(xg.vector), 1.0, &(eg.vector));
|
||||||
#else
|
#else
|
||||||
// ublas e += prod(A,x) is terribly slow
|
// ublas e += prod(A,x) is terribly slow
|
||||||
for (int i = 0; i < A.size1(); i++) {
|
size_t m = A.size1(), n = A.size2();
|
||||||
double& ei = e(i);
|
double * ei = e.data().begin();
|
||||||
for (int j = 0; j < A.size2(); j++) {
|
const double * aij = A.data().begin();
|
||||||
ei += alpha * A(i, j) * x(j);
|
for (int i = 0; i < m; i++, ei++) {
|
||||||
}
|
const double * xj = x.data().begin();
|
||||||
|
for (int j = 0; j < n; j++, aij++, xj++)
|
||||||
|
(*ei) += alpha * (*aij) * (*xj);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -173,11 +175,13 @@ void transposeMultiplyAdd(double alpha, const Matrix& A, const Vector& e, Vector
|
||||||
#else
|
#else
|
||||||
// ublas x += prod(trans(A),e) is terribly slow
|
// ublas x += prod(trans(A),e) is terribly slow
|
||||||
// TODO: use BLAS
|
// TODO: use BLAS
|
||||||
for (int j = 0; j < A.size2(); j++) {
|
size_t m = A.size1(), n = A.size2();
|
||||||
double& xj = x(j);
|
double * xj = x.data().begin();
|
||||||
for (int i = 0; i < A.size1(); i++) {
|
for (int j = 0; j < n; j++,xj++) {
|
||||||
xj += alpha * A(i, j) * e(i);
|
const double * ei = e.data().begin();
|
||||||
}
|
const double * aij = A.data().begin() + j;
|
||||||
|
for (int i = 0; i < m; i++, aij+=n, ei++)
|
||||||
|
(*xj) += alpha * (*aij) * (*ei);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue