update the packaged Eigen to 3.3.9

release/4.3a0
Varun Agrawal 2021-11-29 14:06:01 -05:00
parent 58d4c05ff9
commit 2f4218d046
141 changed files with 1974 additions and 1222 deletions

View File

@ -1,4 +1,3 @@
syntax: glob
qrc_*cxx
*.orig
*.pyc
@ -13,7 +12,7 @@ core
core.*
*.bak
*~
build*
*build*
*.moc.*
*.moc
ui_*
@ -28,7 +27,11 @@ activity.png
*.rej
log
patch
*.patch
a
a.*
lapack/testing
lapack/reference
.*project
.settings
Makefile

View File

@ -1,33 +0,0 @@
2db9468678c6480c9633b6272ff0e3599d1e11a3 2.0-beta3
375224817dce669b6fa31d920d4c895a63fabf32 2.0-beta1
3b8120f077865e2a072e10f5be33e1d942b83a06 2.0-rc1
19dfc0e7666bcee26f7a49eb42f39a0280a3485e 2.0-beta5
7a7d8a9526f003ffa2430dfb0c2c535b5add3023 2.0-beta4
7d14ad088ac23769c349518762704f0257f6a39b 2.0.1
b9d48561579fd7d4c05b2aa42235dc9de6484bf2 2.0-beta6
e17630a40408243cb1a51ad0fe3a99beb75b7450 before-hg-migration
eda654d4cda2210ce80719addcf854773e6dec5a 2.0.0
ee9a7c468a9e73fab12f38f02bac24b07f29ed71 2.0-beta2
d49097c25d8049e730c254a2fed725a240ce4858 after-hg-migration
655348878731bcb5d9bbe0854077b052e75e5237 actual-start-from-scratch
12a658962d4e6dfdc9a1c350fe7b69e36e70675c 3.0-beta1
5c4180ad827b3f869b13b1d82f5a6ce617d6fcee 3.0-beta2
7ae24ca6f3891d5ac58ddc7db60ad413c8d6ec35 3.0-beta3
c40708b9088d622567fecc9208ad4a426621d364 3.0-beta4
b6456624eae74f49ae8683d8e7b2882a2ca0342a 3.0-rc1
a810d5dbab47acfe65b3350236efdd98f67d4d8a 3.1.0-alpha1
304c88ca3affc16dd0b008b1104873986edd77af 3.1.0-alpha2
920fc730b5930daae0a6dbe296d60ce2e3808215 3.1.0-beta1
8383e883ebcc6f14695ff0b5e20bb631abab43fb 3.1.0-rc1
bf4cb8c934fa3a79f45f1e629610f0225e93e493 3.1.0-rc2
da195914abcc1d739027cbee7c52077aab30b336 3.2-beta1
a8e0d153fc5e239ef8b06e3665f1f9e8cb8d49c8 before-evaluators
09a8e21866106b49c5dec1d6d543e5794e82efa0 3.3-alpha1
ce5a455b34c0a0ac3545a1497cb4a16c38ed90e8 3.3-beta1
69d418c0699907bcd0bf9e0b3ba0a112ed091d85 3.3-beta2
bef509908b9da05d0d07ffc0da105e2c8c6d3996 3.3-rc1
04ab5fa4b241754afcf631117572276444c67239 3.3-rc2
26667be4f70baf4f0d39e96f330714c87b399090 3.3.0
f562a193118d4f40514e2f4a0ace6e974926ef06 3.3.1
da9b4e14c2550e0d11078a3c39e6d56eba9905df 3.3.2
67e894c6cd8f5f1f604b27d37ed47fdf012674ff 3.3.3

View File

@ -391,22 +391,27 @@ endif()
if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR)
set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR}
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed")
CACHE STRING "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed")
else()
set(INCLUDE_INSTALL_DIR
"${CMAKE_INSTALL_INCLUDEDIR}/eigen3"
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed"
CACHE STRING "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed"
)
endif()
set(CMAKEPACKAGE_INSTALL_DIR
"${CMAKE_INSTALL_DATADIR}/eigen3/cmake"
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
CACHE STRING "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
)
set(PKGCONFIG_INSTALL_DIR
"${CMAKE_INSTALL_DATADIR}/pkgconfig"
CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed"
CACHE STRING "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed"
)
foreach(var INCLUDE_INSTALL_DIR CMAKEPACKAGE_INSTALL_DIR PKGCONFIG_INSTALL_DIR)
if(IS_ABSOLUTE "${${var}}")
message(FATAL_ERROR "${var} must be relative to CMAKE_PREFIX_PATH. Got: ${${var}}")
endif()
endforeach()
# similar to set_target_properties but append the property instead of overwriting it
macro(ei_add_target_property target prop value)

View File

@ -4,10 +4,10 @@
## # The following are required to uses Dart and the Cdash dashboard
## ENABLE_TESTING()
## INCLUDE(CTest)
set(CTEST_PROJECT_NAME "Eigen 3.3")
set(CTEST_PROJECT_NAME "Eigen")
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
set(CTEST_DROP_METHOD "http")
set(CTEST_DROP_SITE "manao.inria.fr")
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen+3.3")
set(CTEST_DROP_SITE "my.cdash.org")
set(CTEST_DROP_LOCATION "/submit.php?project=Eigen")
set(CTEST_DROP_SITE_CDASH TRUE)

View File

@ -279,7 +279,10 @@
#include <cmath>
#include <cassert>
#include <functional>
#include <iosfwd>
#include <sstream>
#ifndef EIGEN_NO_IO
#include <iosfwd>
#endif
#include <cstring>
#include <string>
#include <limits>
@ -375,7 +378,9 @@ using std::ptrdiff_t;
#if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/AVX/PacketMath.h"
#include "src/Core/arch/AVX/MathFunctions.h"
#include "src/Core/arch/AVX512/PacketMath.h"
#include "src/Core/arch/AVX512/MathFunctions.h"
#elif defined EIGEN_VECTORIZE_AVX

View File

@ -10,14 +10,14 @@
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky"
#include "Jacobi"
#include "Householder"
#include "LU"
#include "Geometry"
#include "src/Core/util/DisableStupidWarnings.h"
/** \defgroup Eigenvalues_Module Eigenvalues module
*
*

View File

@ -10,12 +10,12 @@
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include "SVD"
#include "LU"
#include <limits>
#include "src/Core/util/DisableStupidWarnings.h"
/** \defgroup Geometry_Module Geometry module
*
* This module provides support for:

View File

@ -10,12 +10,12 @@
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
#include "Cholesky"
#include "Jacobi"
#include "Householder"
#include "src/Core/util/DisableStupidWarnings.h"
/** \defgroup QR_Module QR module
*
*

View File

@ -28,7 +28,6 @@
*
*/
#include "OrderingMethods"
#include "src/SparseCore/SparseColEtree.h"
#include "src/SparseQR/SparseQR.h"

View File

@ -153,8 +153,8 @@ template<typename Derived> class ArrayBase
// inline void evalTo(Dest& dst) const { dst = matrix(); }
protected:
EIGEN_DEVICE_FUNC
ArrayBase() : Base() {}
EIGEN_DEFAULT_COPY_CONSTRUCTOR(ArrayBase)
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(ArrayBase)
private:
explicit ArrayBase(Index);

View File

@ -121,6 +121,8 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
{
return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
}
protected:
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
};
} // end namespace Eigen

View File

@ -40,7 +40,7 @@ static inline void check_DenseIndex_is_signed() {
*/
template<typename Derived> class DenseBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
: public DenseCoeffsBase<Derived>
: public DenseCoeffsBase<Derived, internal::accessors_level<Derived>::value>
#else
: public DenseCoeffsBase<Derived,DirectWriteAccessors>
#endif // not EIGEN_PARSED_BY_DOXYGEN
@ -71,7 +71,7 @@ template<typename Derived> class DenseBase
typedef Scalar value_type;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseCoeffsBase<Derived> Base;
typedef DenseCoeffsBase<Derived, internal::accessors_level<Derived>::value> Base;
using Base::derived;
using Base::const_cast_derived;
@ -587,11 +587,12 @@ template<typename Derived> class DenseBase
}
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
/** Default constructor. Do nothing. */
EIGEN_DEVICE_FUNC DenseBase()
{
/* Just checks for self-consistency of the flags.
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
* Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
*/
#ifdef EIGEN_INTERNAL_DEBUGGING
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))

View File

@ -404,7 +404,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
if(size != m_rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
if (size)
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
@ -479,7 +479,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
if(size != _Rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
if (size)
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
@ -553,7 +553,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
if(size != m_rows*_Cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
if (size)
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;

View File

@ -351,10 +351,7 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet&
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
{
// FIXME: uncomment the following in case we drop the internal imag and real functions.
// using std::imag;
// using std::real;
return Packet(imag(a),real(a));
return Packet(a.imag(),a.real());
}
/**************************
@ -524,10 +521,10 @@ inline void palign(PacketType& first, const PacketType& second)
#ifndef __CUDACC__
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
{ return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
{ return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
#endif

View File

@ -182,6 +182,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
#endif
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
template<typename T>
EIGEN_DEVICE_FUNC
@ -294,6 +296,9 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
// In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
// see bugs 821 and 920.
using ReadOnlyMapBase::Base::operator=;
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
};
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS

View File

@ -287,7 +287,7 @@ struct abs2_impl_default<Scalar, true> // IsComplex
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
return real(x)*real(x) + imag(x)*imag(x);
return x.real()*x.real() + x.imag()*x.imag();
}
};
@ -313,14 +313,17 @@ struct abs2_retval
****************************************************************************/
template<typename Scalar, bool IsComplex>
struct norm1_default_impl
struct norm1_default_impl;
template<typename Scalar>
struct norm1_default_impl<Scalar,true>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
EIGEN_USING_STD_MATH(abs);
return abs(real(x)) + abs(imag(x));
return abs(x.real()) + abs(x.imag());
}
};
@ -662,8 +665,8 @@ struct random_default_impl<Scalar, true, false>
{
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return Scalar(random(real(x), real(y)),
random(imag(x), imag(y)));
return Scalar(random(x.real(), y.real()),
random(x.imag(), y.imag()));
}
static inline Scalar run()
{
@ -916,6 +919,9 @@ inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
}
EIGEN_DEVICE_FUNC
inline bool abs2(bool x) { return x; }
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)

View File

@ -464,7 +464,8 @@ template<typename Derived> class MatrixBase
EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
protected:
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase)
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MatrixBase)
private:
EIGEN_DEVICE_FUNC explicit MatrixBase(int);

View File

@ -87,17 +87,6 @@ class PermutationBase : public EigenBase<Derived>
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Derived& operator=(const PermutationBase& other)
{
indices() = other.indices();
return derived();
}
#endif
/** \returns the number of rows */
inline Index rows() const { return Index(indices().size()); }
@ -333,12 +322,6 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
inline PermutationMatrix(const PermutationBase<OtherDerived>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline PermutationMatrix(const PermutationMatrix& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the indices. The indices
* array has the meaning that the permutations sends each integer i to indices[i].
*
@ -373,17 +356,6 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
return Base::operator=(tr.derived());
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
PermutationMatrix& operator=(const PermutationMatrix& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }
/** \returns a reference to the stored array representing the permutation. */

View File

@ -737,8 +737,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
bool(NumTraits<T1>::IsInteger),
const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value;
const bool t1_is_integer_alike = internal::is_valid_index_type<T1>::value;
EIGEN_STATIC_ASSERT(t0_is_integer_alike &&
t1_is_integer_alike,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(rows,cols);
}
@ -773,9 +775,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
&& ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
{
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
const bool is_integer = NumTraits<T>::IsInteger;
EIGEN_UNUSED_VARIABLE(is_integer);
EIGEN_STATIC_ASSERT(is_integer,
const bool is_integer_alike = internal::is_valid_index_type<T>::value;
EIGEN_UNUSED_VARIABLE(is_integer_alike);
EIGEN_STATIC_ASSERT(is_integer_alike,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(size);
}

View File

@ -411,6 +411,32 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
}
// Catch "dst {,+,-}= (s*A)*B" and evaluate it lazily by moving out the scalar factor:
// dst {,+,-}= s * (A.lazyProduct(B))
// This is a huge benefit for heap-allocated matrix types as it save one costly allocation.
// For them, this strategy is also faster than simply by-passing the heap allocation through
// stack allocation.
// For fixed sizes matrices, this is less obvious, it is sometimes x2 faster, but sometimes x3 slower,
// and the behavior depends also a lot on the compiler... so let's be conservative and enable them for dynamic-size only,
// that is when coming from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
template<typename Dst, typename Scalar1, typename Scalar2, typename Plain1, typename Xpr2, typename Func>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void eval_dynamic(Dst& dst, const CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func)
{
call_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func);
}
// Here, we we always have LhsT==Lhs, but we need to make it a template type to make the above
// overload more specialized.
template<typename Dst, typename LhsT, typename Func>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void eval_dynamic(Dst& dst, const LhsT& lhs, const Rhs& rhs, const Func &func)
{
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
}
// template<typename Dst>
// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
// { dst.noalias() += alpha * lhs.lazyProduct(rhs); }

View File

@ -28,12 +28,13 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
template<typename Derived> struct match {
enum {
IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime,
HasDirectAccess = internal::has_direct_access<Derived>::ret,
StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
StorageOrderMatch = IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)
|| int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
OuterStrideMatch = Derived::IsVectorAtCompileTime
OuterStrideMatch = IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
// NOTE, this indirection of evaluator<Derived>::Alignment is needed
// to workaround a very strange bug in MSVC related to the instantiation

View File

@ -19,7 +19,7 @@ namespace internal {
template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
struct triangular_solve_vector;
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder, int OtherInnerStride>
struct triangular_solve_matrix;
// small helper struct extracting some traits on the underlying solver operation
@ -98,8 +98,8 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking);
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor, Rhs::InnerStrideAtCompileTime>
::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.innerStride(), rhs.outerStride(), blocking);
}
};

View File

@ -146,6 +146,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
{
return derived().nestedExpression().coeffRef(index);
}
protected:
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TransposeImpl)
};
/** \returns an expression of the transpose of *this.

View File

@ -34,17 +34,6 @@ class TranspositionsBase
return derived();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Derived& operator=(const TranspositionsBase& other)
{
indices() = other.indices();
return derived();
}
#endif
/** \returns the number of transpositions */
Index size() const { return indices().size(); }
/** \returns the number of rows of the equivalent permutation matrix */
@ -171,12 +160,6 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
inline Transpositions(const TranspositionsBase<OtherDerived>& other)
: m_indices(other.indices()) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** Standard copy constructor. Defined only to prevent a default copy constructor
* from hiding the other templated constructor */
inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
#endif
/** Generic constructor from expression of the transposition indices. */
template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
@ -189,17 +172,6 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
Transpositions& operator=(const Transpositions& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** Constructs an uninitialized permutation matrix of given size.
*/
inline Transpositions(Index size) : m_indices(size)
@ -306,17 +278,6 @@ class TranspositionsWrapper
return Base::operator=(other);
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=.
*/
TranspositionsWrapper& operator=(const TranspositionsWrapper& other)
{
m_indices = other.m_indices;
return *this;
}
#endif
/** const version of indices(). */
const IndicesType& indices() const { return m_indices; }

View File

@ -217,9 +217,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
{}
using Base::operator=;
TriangularView& operator=(const TriangularView &other)
{ return Base::operator=(other); }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TriangularView)
/** \copydoc EigenBase::rows() */
EIGEN_DEVICE_FUNC
@ -544,6 +542,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
template<typename ProductType>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(TriangularViewImpl)
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TriangularViewImpl)
};
/***************************************************************************

View File

@ -29,6 +29,7 @@ namespace internal {
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
// Natural logarithm
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
@ -47,6 +48,7 @@ plog<Packet16f>(const Packet16f& _x) {
// The smallest non denormalized float number.
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(pos_inf, 0x7f800000);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
// Polynomial coefficients.
@ -64,10 +66,8 @@ plog<Packet16f>(const Packet16f& _x) {
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
// invalid_mask is set to true when x is NaN
__mmask16 invalid_mask =
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
__mmask16 iszero_mask =
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
__mmask16 invalid_mask = _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
__mmask16 iszero_mask = _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_OQ);
// Truncate input values to the minimum positive normal.
x = pmax(x, p16f_min_norm_pos);
@ -118,11 +118,18 @@ plog<Packet16f>(const Packet16f& _x) {
x = padd(x, y);
x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
__mmask16 pos_inf_mask = _mm512_cmp_ps_mask(_x,p16f_pos_inf,_CMP_EQ_OQ);
// Filter out invalid inputs, i.e.:
// - negative arg will be NAN,
// - 0 will be -INF.
// - +INF will be +INF
return _mm512_mask_blend_ps(iszero_mask,
_mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
p16f_minus_inf);
_mm512_mask_blend_ps(invalid_mask,
_mm512_mask_blend_ps(pos_inf_mask,x,p16f_pos_inf),
p16f_nan),
p16f_minus_inf);
}
#endif
// Exponential function. Works by writing "x = m*log(2) + r" where
@ -258,48 +265,39 @@ pexp<Packet8d>(const Packet8d& _x) {
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
psqrt<Packet16f>(const Packet16f& _x) {
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
__mmask16 denormal_mask = _mm512_kand(
_mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
_CMP_LT_OQ),
_mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
Packet16f neg_half = pmul(_x, p16f_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
Packet16f x = _mm512_rsqrt14_ps(_x);
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
// Multiply the original _x by it's reciprocal square root to extract the
// square root.
return pmul(_x, x);
// Flush results for denormals to zero.
return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
psqrt<Packet8d>(const Packet8d& _x) {
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5));
__mmask16 denormal_mask = _mm512_kand(
_mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),
_CMP_LT_OQ),
_mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
Packet8d neg_half = pmul(_x, p8d_minus_half);
Packet8d x = _mm512_rsqrt14_pd(_x);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
// Do a second step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
// Multiply the original _x by it's reciprocal square root to extract the
// square root.
return pmul(_x, x);
return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
}
#else
template <>

View File

@ -19,10 +19,10 @@ namespace internal {
#endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
#endif
#ifdef __FMA__
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
@ -54,13 +54,14 @@ template<> struct packet_traits<float> : default_packet_traits
AlignedOnScalar = 1,
size = 16,
HasHalfPacket = 1,
#if EIGEN_GNUC_AT_LEAST(5, 3)
HasBlend = 0,
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
#ifdef EIGEN_VECTORIZE_AVX512DQ
HasLog = 1,
#endif
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasSqrt = EIGEN_FAST_MATH,
HasRsqrt = EIGEN_FAST_MATH,
#endif
HasDiv = 1
};
@ -74,8 +75,8 @@ template<> struct packet_traits<double> : default_packet_traits
AlignedOnScalar = 1,
size = 8,
HasHalfPacket = 1,
#if EIGEN_GNUC_AT_LEAST(5, 3)
HasSqrt = 1,
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
HasSqrt = EIGEN_FAST_MATH,
HasRsqrt = EIGEN_FAST_MATH,
#endif
HasDiv = 1
@ -98,6 +99,7 @@ template <>
struct unpacket_traits<Packet16f> {
typedef float type;
typedef Packet8f half;
typedef Packet16i integer_packet;
enum { size = 16, alignment=Aligned64 };
};
template <>
@ -132,7 +134,7 @@ EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
}
template <>
EIGEN_STRONG_INLINE Packet8d pload1<Packet8d>(const double* from) {
return _mm512_broadcastsd_pd(_mm_load_pd1(from));
return _mm512_set1_pd(*from);
}
template <>
@ -158,6 +160,11 @@ EIGEN_STRONG_INLINE Packet8d padd<Packet8d>(const Packet8d& a,
const Packet8d& b) {
return _mm512_add_pd(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16i padd<Packet16i>(const Packet16i& a,
const Packet16i& b) {
return _mm512_add_epi32(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16f psub<Packet16f>(const Packet16f& a,
@ -169,6 +176,11 @@ EIGEN_STRONG_INLINE Packet8d psub<Packet8d>(const Packet8d& a,
const Packet8d& b) {
return _mm512_sub_pd(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a,
const Packet16i& b) {
return _mm512_sub_epi32(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
@ -202,6 +214,11 @@ EIGEN_STRONG_INLINE Packet8d pmul<Packet8d>(const Packet8d& a,
const Packet8d& b) {
return _mm512_mul_pd(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16i pmul<Packet16i>(const Packet16i& a,
const Packet16i& b) {
return _mm512_mul_epi32(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a,
@ -214,7 +231,7 @@ EIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,
return _mm512_div_pd(a, b);
}
#ifdef __FMA__
#ifdef EIGEN_VECTORIZE_FMA
template <>
EIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,
const Packet16f& c) {
@ -230,23 +247,73 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
template <>
EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,
const Packet16f& b) {
return _mm512_min_ps(a, b);
// Arguments are reversed to match NaN propagation behavior of std::min.
return _mm512_min_ps(b, a);
}
template <>
EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,
const Packet8d& b) {
return _mm512_min_pd(a, b);
// Arguments are reversed to match NaN propagation behavior of std::min.
return _mm512_min_pd(b, a);
}
template <>
EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,
const Packet16f& b) {
return _mm512_max_ps(a, b);
// Arguments are reversed to match NaN propagation behavior of std::max.
return _mm512_max_ps(b, a);
}
template <>
EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
const Packet8d& b) {
return _mm512_max_pd(a, b);
// Arguments are reversed to match NaN propagation behavior of std::max.
return _mm512_max_pd(b, a);
}
#ifdef EIGEN_VECTORIZE_AVX512DQ
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); }
template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); }
EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }
#else
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) {
return _mm256_castsi256_ps(_mm512_extracti64x4_epi64( _mm512_castps_si512(x),I_));
}
// AVX512F does not define _mm512_extractf64x2_pd to extract _m128 from _m512
template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) {
return _mm_castsi128_pd(_mm512_extracti32x4_epi32( _mm512_castpd_si512(x),I_));
}
EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) {
return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),
_mm256_castps_si256(b),1));
}
#endif
// Helper function for bit packing snippet of low precision comparison.
// It packs the flags from 32x16 to 16x16.
EIGEN_STRONG_INLINE __m256i Pack32To16(Packet16f rf) {
// Split data into small pieces and handle with AVX instructions
// to guarantee internal order of vector.
// Operation:
// dst[15:0] := Saturate16(rf[31:0])
// dst[31:16] := Saturate16(rf[63:32])
// ...
// dst[255:240] := Saturate16(rf[255:224])
__m256i lo = _mm256_castps_si256(extract256<0>(rf));
__m256i hi = _mm256_castps_si256(extract256<1>(rf));
__m128i result_lo = _mm_packs_epi32(_mm256_extractf128_si256(lo, 0),
_mm256_extractf128_si256(lo, 1));
__m128i result_hi = _mm_packs_epi32(_mm256_extractf128_si256(hi, 0),
_mm256_extractf128_si256(hi, 1));
return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, 1);
}
template <>
EIGEN_STRONG_INLINE Packet16i pand<Packet16i>(const Packet16i& a,
const Packet16i& b) {
return _mm512_and_si512(a,b);
}
template <>
@ -255,24 +322,7 @@ EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a,
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_and_ps(a, b);
#else
Packet16f res = _mm512_undefined_ps();
Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
res = _mm512_insertf32x4(res, _mm_and_ps(lane0_a, lane0_b), 0);
Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
res = _mm512_insertf32x4(res, _mm_and_ps(lane1_a, lane1_b), 1);
Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
res = _mm512_insertf32x4(res, _mm_and_ps(lane2_a, lane2_b), 2);
Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
res = _mm512_insertf32x4(res, _mm_and_ps(lane3_a, lane3_b), 3);
return res;
return _mm512_castsi512_ps(pand(_mm512_castps_si512(a),_mm512_castps_si512(b)));
#endif
}
template <>
@ -288,35 +338,21 @@ EIGEN_STRONG_INLINE Packet8d pand<Packet8d>(const Packet8d& a,
Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
res = _mm512_insertf64x4(res, _mm256_and_pd(lane1_a, lane1_b), 1);
return res;
return _mm512_insertf64x4(res, _mm256_and_pd(lane1_a, lane1_b), 1);
#endif
}
template <>
EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a,
const Packet16f& b) {
EIGEN_STRONG_INLINE Packet16i por<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_or_si512(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a, const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_or_ps(a, b);
#else
Packet16f res = _mm512_undefined_ps();
Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
res = _mm512_insertf32x4(res, _mm_or_ps(lane0_a, lane0_b), 0);
Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
res = _mm512_insertf32x4(res, _mm_or_ps(lane1_a, lane1_b), 1);
Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
res = _mm512_insertf32x4(res, _mm_or_ps(lane2_a, lane2_b), 2);
Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
res = _mm512_insertf32x4(res, _mm_or_ps(lane3_a, lane3_b), 3);
return res;
return _mm512_castsi512_ps(por(_mm512_castps_si512(a),_mm512_castps_si512(b)));
#endif
}
@ -326,109 +362,67 @@ EIGEN_STRONG_INLINE Packet8d por<Packet8d>(const Packet8d& a,
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_or_pd(a, b);
#else
Packet8d res = _mm512_undefined_pd();
Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
res = _mm512_insertf64x4(res, _mm256_or_pd(lane0_a, lane0_b), 0);
Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
res = _mm512_insertf64x4(res, _mm256_or_pd(lane1_a, lane1_b), 1);
return res;
return _mm512_castsi512_pd(por(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));
#endif
}
template <>
EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a,
const Packet16f& b) {
EIGEN_STRONG_INLINE Packet16i pxor<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_xor_si512(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a, const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_xor_ps(a, b);
#else
Packet16f res = _mm512_undefined_ps();
Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
res = _mm512_insertf32x4(res, _mm_xor_ps(lane0_a, lane0_b), 0);
Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
res = _mm512_insertf32x4(res, _mm_xor_ps(lane1_a, lane1_b), 1);
Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
res = _mm512_insertf32x4(res, _mm_xor_ps(lane2_a, lane2_b), 2);
Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
res = _mm512_insertf32x4(res, _mm_xor_ps(lane3_a, lane3_b), 3);
return res;
return _mm512_castsi512_ps(pxor(_mm512_castps_si512(a),_mm512_castps_si512(b)));
#endif
}
template <>
EIGEN_STRONG_INLINE Packet8d pxor<Packet8d>(const Packet8d& a,
const Packet8d& b) {
EIGEN_STRONG_INLINE Packet8d pxor<Packet8d>(const Packet8d& a, const Packet8d& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_xor_pd(a, b);
#else
Packet8d res = _mm512_undefined_pd();
Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
res = _mm512_insertf64x4(res, _mm256_xor_pd(lane0_a, lane0_b), 0);
Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
res = _mm512_insertf64x4(res, _mm256_xor_pd(lane1_a, lane1_b), 1);
return res;
return _mm512_castsi512_pd(pxor(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));
#endif
}
template <>
EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a,
const Packet16f& b) {
EIGEN_STRONG_INLINE Packet16i pandnot<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_andnot_si512(b, a);
}
template <>
EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a, const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_andnot_ps(a, b);
return _mm512_andnot_ps(b, a);
#else
Packet16f res = _mm512_undefined_ps();
Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
res = _mm512_insertf32x4(res, _mm_andnot_ps(lane0_a, lane0_b), 0);
Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
res = _mm512_insertf32x4(res, _mm_andnot_ps(lane1_a, lane1_b), 1);
Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
res = _mm512_insertf32x4(res, _mm_andnot_ps(lane2_a, lane2_b), 2);
Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
res = _mm512_insertf32x4(res, _mm_andnot_ps(lane3_a, lane3_b), 3);
return res;
return _mm512_castsi512_ps(pandnot(_mm512_castps_si512(a),_mm512_castps_si512(b)));
#endif
}
template <>
EIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,
const Packet8d& b) {
EIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,const Packet8d& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_andnot_pd(a, b);
return _mm512_andnot_pd(b, a);
#else
Packet8d res = _mm512_undefined_pd();
Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane0_a, lane0_b), 0);
Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane1_a, lane1_b), 1);
return res;
return _mm512_castsi512_pd(pandnot(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));
#endif
}
template<int N> EIGEN_STRONG_INLINE Packet16i parithmetic_shift_right(Packet16i a) {
return _mm512_srai_epi32(a, N);
}
template<int N> EIGEN_STRONG_INLINE Packet16i plogical_shift_right(Packet16i a) {
return _mm512_srli_epi32(a, N);
}
template<int N> EIGEN_STRONG_INLINE Packet16i plogical_shift_left(Packet16i a) {
return _mm512_slli_epi32(a, N);
}
template <>
EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {
EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);
@ -461,75 +455,55 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
// {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
template <>
EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from);
// mimic an "inplace" permutation of the lower 128bits using a blend
lane0 = _mm256_blend_ps(
lane0, _mm256_castps128_ps256(_mm_permute_ps(
_mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))),
15);
// then we can perform a consistent permutation on the global register to get
// everything in shape:
lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2));
Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4));
// mimic an "inplace" permutation of the lower 128bits using a blend
lane1 = _mm256_blend_ps(
lane1, _mm256_castps128_ps256(_mm_permute_ps(
_mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))),
15);
// then we can perform a consistent permutation on the global register to get
// everything in shape:
lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2));
// an unaligned load is required here as there is no requirement
// on the alignment of input pointer 'from'
__m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
__m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
__m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
return pairs;
}
#ifdef EIGEN_VECTORIZE_AVX512DQ
Packet16f res = _mm512_undefined_ps();
return _mm512_insertf32x8(res, lane0, 0);
return _mm512_insertf32x8(res, lane1, 1);
return res;
#else
Packet16f res = _mm512_undefined_ps();
res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0);
res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1);
res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2);
res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3);
return res;
#endif
}
// FIXME: this does not look optimal, better load a Packet4d and shuffle...
// Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3,
// a3}
template <>
EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from);
lane0 = _mm256_permute_pd(lane0, 3 << 2);
Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2));
lane1 = _mm256_permute_pd(lane1, 3 << 2);
Packet8d res = _mm512_undefined_pd();
res = _mm512_insertf64x4(res, lane0, 0);
return _mm512_insertf64x4(res, lane1, 1);
__m512d x = _mm512_setzero_pd();
x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0);
x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1);
x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2);
x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3);
return x;
}
#else
template <>
EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
__m512d x = _mm512_setzero_pd();
x = _mm512_mask_broadcastsd_pd(x, 0x3<<0, _mm_load_sd(from+0));
x = _mm512_mask_broadcastsd_pd(x, 0x3<<2, _mm_load_sd(from+1));
x = _mm512_mask_broadcastsd_pd(x, 0x3<<4, _mm_load_sd(from+2));
x = _mm512_mask_broadcastsd_pd(x, 0x3<<6, _mm_load_sd(from+3));
return x;
}
#endif
// Loads 4 floats from memory a returns the packet
// {a0, a0 a0, a0, a1, a1, a1, a1, a2, a2, a2, a2, a3, a3, a3, a3}
template <>
EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {
Packet16f tmp = _mm512_undefined_ps();
tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from), 0);
tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 1), 1);
tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 2), 2);
tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 3), 3);
return tmp;
Packet16f tmp = _mm512_castps128_ps512(ploadu<Packet4f>(from));
const Packet16i scatter_mask = _mm512_set_epi32(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0);
return _mm512_permutexvar_ps(scatter_mask, tmp);
}
// Loads 2 doubles from memory a returns the packet
// {a0, a0 a0, a0, a1, a1, a1, a1}
template <>
EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
Packet8d tmp = _mm512_undefined_pd();
Packet2d tmp0 = _mm_load_pd1(from);
Packet2d tmp1 = _mm_load_pd1(from + 1);
Packet4d lane0 = _mm256_broadcastsd_pd(tmp0);
Packet4d lane1 = _mm256_broadcastsd_pd(tmp1);
__m256d lane0 = _mm256_set1_pd(*from);
__m256d lane1 = _mm256_set1_pd(*(from+1));
__m512d tmp = _mm512_undefined_pd();
tmp = _mm512_insertf64x4(tmp, lane0, 0);
return _mm512_insertf64x4(tmp, lane1, 1);
}
@ -565,7 +539,7 @@ EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
template <>
EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,
Index stride) {
Packet16i stride_vector = _mm512_set1_epi32(stride);
Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
Packet16i stride_multiplier =
_mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
@ -575,7 +549,7 @@ EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,
template <>
EIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const double* from,
Index stride) {
Packet8i stride_vector = _mm256_set1_epi32(stride);
Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
@ -586,7 +560,7 @@ template <>
EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,
const Packet16f& from,
Index stride) {
Packet16i stride_vector = _mm512_set1_epi32(stride);
Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
Packet16i stride_multiplier =
_mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
@ -596,7 +570,7 @@ template <>
EIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,
const Packet8d& from,
Index stride) {
Packet8i stride_vector = _mm256_set1_epi32(stride);
Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
_mm512_i32scatter_pd(to, indices, from, 8);
@ -660,8 +634,8 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
__m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \
_mm512_extractf32x8_ps(INPUT, 1)
__m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0); \
__m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1)
#else
#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
__m256 OUTPUT##_0 = _mm256_insertf128_ps( \
@ -674,17 +648,136 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTA, 0); \
OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTB, 1);
OUTPUT = _mm512_insertf32x8(_mm512_castps256_ps512(INPUTA), INPUTB, 1);
#else
#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
OUTPUT = _mm512_undefined_ps(); \
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 0), 0); \
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 1), 1); \
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
#endif
template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f*
vecs)
template <>
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
__m256 lane0 = _mm512_extractf32x8_ps(a, 0);
__m256 lane1 = _mm512_extractf32x8_ps(a, 1);
Packet8f x = _mm256_add_ps(lane0, lane1);
return predux<Packet8f>(x);
#else
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
__m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3));
sum = _mm_hadd_ps(sum, sum);
sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
return _mm_cvtss_f32(sum);
#endif
}
template <>
EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
__m256d sum = _mm256_add_pd(lane0, lane1);
__m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0)));
}
template <>
EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
return padd(lane0, lane1);
#else
Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
Packet4f sum0 = padd(lane0, lane2);
Packet4f sum1 = padd(lane1, lane3);
return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);
#endif
}
template <>
EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
Packet4d res = padd(lane0, lane1);
return res;
}
template <>
EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
//#ifdef EIGEN_VECTORIZE_AVX512DQ
#if 0
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
Packet8f res = pmul(lane0, lane1);
res = pmul(res, _mm256_permute2f128_ps(res, res, 1));
res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
#else
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
__m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
#endif
}
template <>
EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
__m256d res = pmul(lane0, lane1);
res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
}
template <>
EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
__m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
}
template <>
EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
__m256d res = _mm256_min_pd(lane0, lane1);
res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
}
template <>
EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
__m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
}
template <>
EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
__m256d res = _mm256_max_pd(lane0, lane1);
res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
}
template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f* vecs)
{
EIGEN_EXTRACT_8f_FROM_16f(vecs[0], vecs0);
EIGEN_EXTRACT_8f_FROM_16f(vecs[1], vecs1);
@ -874,173 +967,6 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
return _mm512_insertf64x4(final_output, final_1, 1);
}
template <>
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
//#ifdef EIGEN_VECTORIZE_AVX512DQ
#if 0
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
Packet8f sum = padd(lane0, lane1);
Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1));
tmp0 = _mm256_hadd_ps(tmp0, tmp0);
return pfirst(_mm256_hadd_ps(tmp0, tmp0));
#else
Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3));
sum = _mm_hadd_ps(sum, sum);
sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
return pfirst(sum);
#endif
}
template <>
EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
Packet4d sum = padd(lane0, lane1);
Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
return pfirst(_mm256_hadd_pd(tmp0, tmp0));
}
template <>
EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
return padd(lane0, lane1);
#else
Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
Packet4f sum0 = padd(lane0, lane2);
Packet4f sum1 = padd(lane1, lane3);
return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);
#endif
}
template <>
EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
Packet4d res = padd(lane0, lane1);
return res;
}
template <>
EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
//#ifdef EIGEN_VECTORIZE_AVX512DQ
#if 0
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
Packet8f res = pmul(lane0, lane1);
res = pmul(res, _mm256_permute2f128_ps(res, res, 1));
res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
#else
Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
#endif
}
template <>
EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
Packet4d res = pmul(lane0, lane1);
res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
}
template <>
EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
}
template <>
EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
Packet4d res = _mm256_min_pd(lane0, lane1);
res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
}
template <>
EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
}
template <>
EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
Packet4d res = _mm256_max_pd(lane0, lane1);
res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
}
template <int Offset>
struct palign_impl<Offset, Packet16f> {
static EIGEN_STRONG_INLINE void run(Packet16f& first,
const Packet16f& second) {
if (Offset != 0) {
__m512i first_idx = _mm512_set_epi32(
Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
__m512i second_idx =
_mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
Offset - 5, Offset - 6, Offset - 7, Offset - 8,
Offset - 9, Offset - 10, Offset - 11, Offset - 12,
Offset - 13, Offset - 14, Offset - 15, Offset - 16);
unsigned short mask = 0xFFFF;
mask <<= (16 - Offset);
first = _mm512_permutexvar_ps(first_idx, first);
Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
first = _mm512_mask_blend_ps(mask, first, tmp);
}
}
};
template <int Offset>
struct palign_impl<Offset, Packet8d> {
static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
if (Offset != 0) {
__m512i first_idx = _mm512_set_epi32(
0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
__m512i second_idx = _mm512_set_epi32(
0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
unsigned char mask = 0xFF;
mask <<= (8 - Offset);
first = _mm512_permutexvar_pd(first_idx, first);
Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
first = _mm512_mask_blend_pd(mask, first, tmp);
}
}
};
#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
@ -1302,13 +1228,76 @@ EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& /*ifPacket*/,
return Packet16f();
}
template <>
EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& /*ifPacket*/,
const Packet8d& /*thenPacket*/,
const Packet8d& /*elsePacket*/) {
assert(false && "To be implemented");
return Packet8d();
EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket,
const Packet8d& thenPacket,
const Packet8d& elsePacket) {
__mmask8 m = (ifPacket.select[0] )
| (ifPacket.select[1]<<1)
| (ifPacket.select[2]<<2)
| (ifPacket.select[3]<<3)
| (ifPacket.select[4]<<4)
| (ifPacket.select[5]<<5)
| (ifPacket.select[6]<<6)
| (ifPacket.select[7]<<7);
return _mm512_mask_blend_pd(m, elsePacket, thenPacket);
}
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
return _mm512_cvttps_epi32(a);
}
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
return _mm512_cvtepi32_ps(a);
}
template <int Offset>
struct palign_impl<Offset, Packet16f> {
static EIGEN_STRONG_INLINE void run(Packet16f& first,
const Packet16f& second) {
if (Offset != 0) {
__m512i first_idx = _mm512_set_epi32(
Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
__m512i second_idx =
_mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
Offset - 5, Offset - 6, Offset - 7, Offset - 8,
Offset - 9, Offset - 10, Offset - 11, Offset - 12,
Offset - 13, Offset - 14, Offset - 15, Offset - 16);
unsigned short mask = 0xFFFF;
mask <<= (16 - Offset);
first = _mm512_permutexvar_ps(first_idx, first);
Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
first = _mm512_mask_blend_ps(mask, first, tmp);
}
}
};
template <int Offset>
struct palign_impl<Offset, Packet8d> {
static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
if (Offset != 0) {
__m512i first_idx = _mm512_set_epi32(
0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
__m512i second_idx = _mm512_set_epi32(
0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
unsigned char mask = 0xFF;
mask <<= (8 - Offset);
first = _mm512_permutexvar_pd(first_idx, first);
Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
first = _mm512_mask_blend_pd(mask, first, tmp);
}
}
};
} // end namespace internal
} // end namespace Eigen

View File

@ -42,6 +42,7 @@
#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
#endif
#include <sstream>
namespace Eigen {

View File

@ -230,7 +230,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2&
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return Eigen::half(half_impl::raw_uint16_to_half(__float2half_rn(a1 + a2)));
return Eigen::half(__float2half_rn(a1 + a2));
#endif
}
@ -264,7 +264,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const ha
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return Eigen::half(half_impl::raw_uint16_to_half(__float2half_rn(a1 * a2)));
return Eigen::half(__float2half_rn(a1 * a2));
#endif
}

View File

@ -768,7 +768,7 @@ struct scalar_sign_op<Scalar,true> {
if (aa==real_type(0))
return Scalar(0);
aa = real_type(1)/aa;
return Scalar(real(a)*aa, imag(a)*aa );
return Scalar(a.real()*aa, a.imag()*aa );
}
//TODO
//template <typename Packet>

View File

@ -115,7 +115,8 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// registers. However once the latency is hidden there is no point in
// increasing the value of k, so we'll cap it at 320 (value determined
// experimentally).
const Index k_cache = (numext::mini<Index>)((l1-ksub)/kdiv, 320);
// To avoid that k vanishes, we make k_cache at least as big as kr
const Index k_cache = numext::maxi<Index>(kr, (numext::mini<Index>)((l1-ksub)/kdiv, 320));
if (k_cache < k) {
k = k_cache - (k_cache % kr);
eigen_internal_assert(k > 0);
@ -648,8 +649,8 @@ public:
// Vectorized path
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacketType& dest) const
{
dest.first = pset1<RealPacket>(real(*b));
dest.second = pset1<RealPacket>(imag(*b));
dest.first = pset1<RealPacket>(numext::real(*b));
dest.second = pset1<RealPacket>(numext::imag(*b));
}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const

View File

@ -20,8 +20,9 @@ template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride>
{
typedef gebp_traits<RhsScalar,LhsScalar> Traits;
@ -30,7 +31,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
Index rows, Index cols, Index depth,
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride,
ResScalar* res, Index resStride,
ResScalar* res, Index resIncr, Index resStride,
ResScalar alpha,
level3_blocking<RhsScalar,LhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
@ -39,8 +40,8 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
general_matrix_matrix_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor>
::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
ColMajor,ResInnerStride>
::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,info);
}
};
@ -49,8 +50,9 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride>
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
@ -59,17 +61,17 @@ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScala
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride,
ResScalar* _res, Index resStride,
ResScalar* _res, Index resIncr, Index resStride,
ResScalar alpha,
level3_blocking<LhsScalar,RhsScalar>& blocking,
GemmParallelInfo<Index>* info = 0)
{
typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride);
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor,Unaligned,ResInnerStride> ResMapper;
LhsMapper lhs(_lhs, lhsStride);
RhsMapper rhs(_rhs, rhsStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@ -226,7 +228,7 @@ struct gemm_functor
Gemm::run(rows, cols, m_lhs.cols(),
&m_lhs.coeffRef(row,0), m_lhs.outerStride(),
&m_rhs.coeffRef(0,col), m_rhs.outerStride(),
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.innerStride(), m_dest.outerStride(),
m_actualAlpha, m_blocking, info);
}
@ -428,7 +430,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
lazyproduct::evalTo(dst, lhs, rhs);
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar,Scalar>());
else
{
dst.setZero();
@ -440,7 +442,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
lazyproduct::addTo(dst, lhs, rhs);
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar,Scalar>());
else
scaleAndAddTo(dst,lhs, rhs, Scalar(1));
}
@ -449,7 +451,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
lazyproduct::subTo(dst, lhs, rhs);
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar,Scalar>());
else
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
}
@ -476,7 +478,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
Index,
LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,
Dest::InnerStrideAtCompileTime>,
ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);

View File

@ -25,51 +25,54 @@ namespace internal {
**********************************************************************/
// forward declarations (defined at the end of this file)
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>
struct tribb_kernel;
/* Optimized matrix-matrix product evaluating only one triangular half */
template <typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int UpLo, int Version = Specialized>
int ResStorageOrder, int ResInnerStride, int UpLo, int Version = Specialized>
struct general_matrix_matrix_triangular_product;
// as usual if the result is row major => we transpose the product
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,UpLo,Version>
{
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr, Index resStride,
const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking)
{
general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor, UpLo==Lower?Upper:Lower>
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking);
ColMajor, ResInnerStride, UpLo==Lower?Upper:Lower>
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking);
}
};
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int UpLo, int Version>
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,UpLo,Version>
{
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
const RhsScalar* _rhs, Index rhsStride,
ResScalar* _res, Index resIncr, Index resStride,
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc();
Index mc = (std::min)(size,blocking.mc());
@ -87,7 +90,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, ResInnerStride, UpLo> sybb;
for(Index k2=0; k2<depth; k2+=kc)
{
@ -110,8 +113,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc,
(std::min)(size,i2), alpha, -1, -1, 0, 0);
sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
sybb(_res+resStride*i2 + resIncr*i2, resIncr, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
if (UpLo==Upper)
{
@ -133,7 +135,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
// while the triangular block overlapping the diagonal is evaluated into a
// small temporary buffer which is then accumulated into the result using a
// triangular traversal.
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>
struct tribb_kernel
{
typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
@ -142,11 +144,13 @@ struct tribb_kernel
enum {
BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
};
void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
void operator()(ResScalar* _res, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
{
typedef blas_data_mapper<ResScalar, Index, ColMajor> ResMapper;
ResMapper res(_res, resStride);
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned> BufferMapper;
ResMapper res(_res, resStride, resIncr);
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel1;
gebp_kernel<LhsScalar, RhsScalar, Index, BufferMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel2;
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert()));
@ -158,31 +162,32 @@ struct tribb_kernel
const RhsScalar* actual_b = blockB+j*depth;
if(UpLo==Upper)
gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
// selfadjoint micro block
{
Index i = j;
buffer.setZero();
// 1 - apply the kernel on the temporary buffer
gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
gebp_kernel2(BufferMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
// 2 - triangular accumulation
for(Index j1=0; j1<actualBlockSize; ++j1)
{
ResScalar* r = &res(i, j + j1);
typename ResMapper::LinearMapper r = res.getLinearMapper(i,j+j1);
for(Index i1=UpLo==Lower ? j1 : 0;
UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
r[i1] += buffer(i1,j1);
r(i1) += buffer(i1,j1);
}
}
if(UpLo==Lower)
{
Index i = j+actualBlockSize;
gebp_kernel(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i,
depth, actualBlockSize, alpha, -1, -1, 0, 0);
gebp_kernel1(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i,
depth, actualBlockSize, alpha, -1, -1, 0, 0);
}
}
}
@ -286,11 +291,12 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo&(Lower|Upper)>
::run(size, depth,
&actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
&actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? mat.innerStride() : mat.outerStride() ) : 0),
mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
}
};

View File

@ -40,7 +40,7 @@ namespace internal {
template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>
struct general_matrix_matrix_rankupdate :
general_matrix_matrix_triangular_product<
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,1,UpLo,BuiltIn> {};
// try to go to BLAS specialization
@ -48,9 +48,9 @@ struct general_matrix_matrix_rankupdate :
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs, int UpLo> \
struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,1,UpLo,Specialized> { \
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
{ \
if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
@ -59,8 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
general_matrix_matrix_triangular_product<Index, \
Scalar, LhsStorageOrder, ConjugateLhs, \
Scalar, RhsStorageOrder, ConjugateRhs, \
ColMajor, UpLo, BuiltIn> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
ColMajor, 1, UpLo, BuiltIn> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resIncr,resStride,alpha,blocking); \
} \
} \
};

View File

@ -51,20 +51,22 @@ template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1> \
{ \
typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \
\
static void run(Index rows, Index cols, Index depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, \
level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \
GemmParallelInfo<Index>* /*info = 0*/) \
{ \
using std::conj; \
\
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char transa, transb; \
BlasIndex m, n, k, lda, ldb, ldc; \
const EIGTYPE *a, *b; \

View File

@ -17,7 +17,8 @@ namespace internal {
/** \internal */
inline void manage_multi_threading(Action action, int* v)
{
static EIGEN_UNUSED int m_maxThreads = -1;
static int m_maxThreads = -1;
EIGEN_UNUSED_VARIABLE(m_maxThreads);
if(action==SetAction)
{
@ -150,8 +151,10 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
info[i].lhs_start = r0;
info[i].lhs_length = actualBlockRows;
if(transpose) func(c0, actualBlockCols, 0, rows, info);
else func(0, rows, c0, actualBlockCols, info);
if(transpose)
func(c0, actualBlockCols, 0, rows, info);
else
func(0, rows, c0, actualBlockCols, info);
}
#endif
}

View File

@ -277,20 +277,21 @@ struct symm_pack_rhs
template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
int ResStorageOrder>
int ResStorageOrder, int ResInnerStride>
struct product_selfadjoint_matrix;
template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
int ResInnerStride>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
{
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_selfadjoint_matrix<Scalar, Index,
@ -298,33 +299,35 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
ColMajor>
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
ColMajor,ResInnerStride>
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
}
};
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
Index size = rows;
@ -334,11 +337,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@ -398,26 +401,28 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
// matrix * selfadjoint product
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
Index size = cols;
@ -425,9 +430,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
typedef gebp_traits<Scalar,Scalar> Traits;
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
ResMapper res(_res,resStride);
ResMapper res(_res,resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@ -503,12 +508,13 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
Dest::InnerStrideAtCompileTime>
::run(
lhs.rows(), rhs.cols(), // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
actualAlpha, blocking // alpha
);
}

View File

@ -44,16 +44,18 @@ namespace internal {
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
{\
\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='L', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
@ -91,15 +93,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
{\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='L', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
@ -167,16 +171,18 @@ EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
{\
\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='R', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
@ -213,15 +219,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
{\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='R', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \

View File

@ -109,10 +109,10 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
internal::general_matrix_matrix_triangular_product<Index,
Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
IsRowMajor ? RowMajor : ColMajor, UpLo>
IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo>
::run(size, depth,
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
mat.data(), mat.outerStride(), actualAlpha, blocking);
mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
}
};

View File

@ -45,22 +45,24 @@ template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int Version = Specialized>
int ResStorageOrder, int ResInnerStride,
int Version = Specialized>
struct product_triangular_matrix_matrix;
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version>
{
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_triangular_matrix_matrix<Scalar, Index,
@ -70,18 +72,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
ConjugateRhs,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateLhs,
ColMajor>
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
ColMajor, ResInnerStride>
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
}
};
// implements col-major += alpha * op(triangular) * op(general)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
@ -95,20 +98,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
@ -119,10 +123,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@ -235,10 +239,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
@ -251,20 +256,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
@ -276,10 +282,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
@ -433,12 +439,12 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
Mode, LhsIsTriangular,
(internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
(internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>
::run(
stripedRows, stripedCols, stripedDepth, // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
actualAlpha, blocking
);

View File

@ -46,7 +46,7 @@ template <typename Scalar, typename Index,
struct product_triangular_matrix_matrix_trmm :
product_triangular_matrix_matrix<Scalar,Index,Mode,
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {};
// try to go to BLAS specialization
@ -55,13 +55,15 @@ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
} \
};
@ -115,8 +117,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from BLAS */ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
@ -124,8 +126,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, 1, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \
@ -232,8 +234,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
/* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
@ -241,8 +243,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, 1, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \

View File

@ -15,48 +15,48 @@ namespace Eigen {
namespace internal {
// if the rhs is row major, let's transpose the product
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
{
static void run(
Index size, Index cols,
const Scalar* tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
triangular_solve_matrix<
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
NumTraits<Scalar>::IsComplex && Conjugate,
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
::run(size, cols, tri, triStride, _other, otherStride, blocking);
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
}
};
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index cols = otherSize;
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
typedef blas_data_mapper<Scalar, Index, ColMajor> OtherMapper;
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
TriMapper tri(_tri, triStride);
OtherMapper other(_other, otherStride);
OtherMapper other(_other, otherStride, otherIncr);
typedef gebp_traits<Scalar,Scalar> Traits;
@ -128,19 +128,19 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
{
Scalar b(0);
const Scalar* l = &tri(i,s);
Scalar* r = &other(s,j);
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
for (Index i3=0; i3<k; ++i3)
b += conj(l[i3]) * r[i3];
b += conj(l[i3]) * r(i3);
other(i,j) = (other(i,j) - b)*a;
}
else
{
Scalar b = (other(i,j) *= a);
Scalar* r = &other(s,j);
const Scalar* l = &tri(s,i);
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
for (Index i3=0;i3<rs;++i3)
r[i3] -= b * conj(l[i3]);
r(i3) -= b * conj(l(i3));
}
}
}
@ -185,28 +185,28 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index rows = otherSize;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef blas_data_mapper<Scalar, Index, ColMajor> LhsMapper;
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
LhsMapper lhs(_other, otherStride);
LhsMapper lhs(_other, otherStride, otherIncr);
RhsMapper rhs(_tri, triStride);
typedef gebp_traits<Scalar,Scalar> Traits;
@ -297,24 +297,24 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
{
Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
Scalar* r = &lhs(i2,j);
typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
for (Index k3=0; k3<k; ++k3)
{
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
Scalar* a = &lhs(i2,IsLower ? j+1+k3 : absolute_j2+k3);
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
for (Index i=0; i<actual_mc; ++i)
r[i] -= a[i] * b;
r(i) -= a(i) * b;
}
if((Mode & UnitDiag)==0)
{
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
for (Index i=0; i<actual_mc; ++i)
r[i] *= inv_rjj;
r(i) *= inv_rjj;
}
}
// pack the just computed part of lhs to A
pack_lhs_panel(blockA, LhsMapper(_other+absolute_j2*otherStride+i2, otherStride),
pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
actualPanelWidth, actual_mc,
actual_kc, j2);
}

View File

@ -40,7 +40,7 @@ namespace internal {
// implements LeftSide op(triangular)^-1 * general
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
@ -51,8 +51,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
eigen_assert(otherIncr == 1); \
BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \
@ -99,7 +101,7 @@ EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
// implements RightSide general * op(triangular)^-1
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
@ -110,8 +112,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
eigen_assert(otherIncr == 1); \
BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \

View File

@ -31,7 +31,7 @@ template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
int ResStorageOrder, int ResInnerStride>
struct general_matrix_matrix_product;
template<typename Index,
@ -155,13 +155,21 @@ class BlasVectorMapper {
Scalar* m_data;
};
template<typename Scalar, typename Index, int AlignmentType, int Incr=1>
class BlasLinearMapper;
template<typename Scalar, typename Index, int AlignmentType>
class BlasLinearMapper {
class BlasLinearMapper<Scalar,Index,AlignmentType,1> {
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)
: m_data(data)
{
EIGEN_ONLY_USED_FOR_DEBUG(incr);
eigen_assert(incr==1);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
@ -188,16 +196,25 @@ class BlasLinearMapper {
};
// Lightweight helper class to access matrix coefficients.
template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
class blas_data_mapper {
public:
template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>
class blas_data_mapper;
template<typename Scalar, typename Index, int StorageOrder, int AlignmentType>
class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>
{
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
typedef BlasVectorMapper<Scalar, Index> VectorMapper;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
: m_data(data), m_stride(stride)
{
EIGEN_ONLY_USED_FOR_DEBUG(incr);
eigen_assert(incr==1);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
getSubMapper(Index i, Index j) const {
@ -251,6 +268,90 @@ class blas_data_mapper {
const Index m_stride;
};
// Implementation of non-natural increment (i.e. inner-stride != 1)
// The exposed API is not complete yet compared to the Incr==1 case
// because some features makes less sense in this case.
template<typename Scalar, typename Index, int AlignmentType, int Incr>
class BlasLinearMapper
{
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
return m_data[i*m_incr.value()];
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return pgather<Scalar,Packet>(m_data + i*m_incr.value(), m_incr.value());
}
template<typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
}
protected:
Scalar *m_data;
const internal::variable_if_dynamic<Index,Incr> m_incr;
};
template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>
class blas_data_mapper
{
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper
getSubMapper(Index i, Index j) const {
return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(&operator()(i, j), m_incr.value());
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return pgather<Scalar,Packet>(&operator()(i, j),m_incr.value());
}
template <typename PacketT, int AlignmentT>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
}
template<typename SubPacket>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}
template<typename SubPacket>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
}
protected:
Scalar* EIGEN_RESTRICT m_data;
const Index m_stride;
const internal::variable_if_dynamic<Index,Incr> m_incr;
};
// lightweight helper class to access matrix coefficients (const version)
template<typename Scalar, typename Index, int StorageOrder>
class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {

View File

@ -57,7 +57,10 @@
#if __GNUC__>=6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
#if __GNUC__==7
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325
#pragma GCC diagnostic ignored "-Wattributes"
#endif
#endif
#if defined __NVCC__
@ -80,4 +83,12 @@
#pragma diag_suppress 2737
#endif
#else
// warnings already disabled:
# ifndef EIGEN_WARNINGS_DISABLED_2
# define EIGEN_WARNINGS_DISABLED_2
# elif defined(EIGEN_INTERNAL_DEBUGGING)
# error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!"
# endif
#endif // not EIGEN_WARNINGS_DISABLED

View File

@ -47,11 +47,7 @@ template<typename T> struct NumTraits;
template<typename Derived> struct EigenBase;
template<typename Derived> class DenseBase;
template<typename Derived> class PlainObjectBase;
template<typename Derived,
int Level = internal::accessors_level<Derived>::value >
class DenseCoeffsBase;
template<typename Derived, int Level> class DenseCoeffsBase;
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |

View File

@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 3
#define EIGEN_MINOR_VERSION 7
#define EIGEN_MINOR_VERSION 9
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@ -380,7 +380,8 @@
#if EIGEN_MAX_CPP_VER>=11 && \
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
|| (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
|| (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)))
|| (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \
|| (EIGEN_COMP_MSVC >= 1900) )
#define EIGEN_HAS_C99_MATH 1
#else
#define EIGEN_HAS_C99_MATH 0
@ -396,6 +397,20 @@
#endif
#endif
// Does the compiler support type_traits?
// - full support of type traits was added only to GCC 5.1.0.
// - 20150626 corresponds to the last release of 4.x libstdc++
#ifndef EIGEN_HAS_TYPE_TRAITS
#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) \
&& ((!EIGEN_COMP_GNUC_STRICT) || EIGEN_GNUC_AT_LEAST(5, 1)) \
&& ((!defined(__GLIBCXX__)) || __GLIBCXX__ > 20150626)
#define EIGEN_HAS_TYPE_TRAITS 1
#define EIGEN_INCLUDE_TYPE_TRAITS
#else
#define EIGEN_HAS_TYPE_TRAITS 0
#endif
#endif
// Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
@ -835,11 +850,48 @@ namespace Eigen {
#endif
/**
* \internal
* \brief Macro to explicitly define the default copy constructor.
* This is necessary, because the implicit definition is deprecated if the copy-assignment is overridden.
*/
#if EIGEN_HAS_CXX11
#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) EIGEN_DEVICE_FUNC CLASS(const CLASS&) = default;
#else
#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS)
#endif
/** \internal
* \brief Macro to manually inherit assignment operators.
* This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.
* With C++11 or later this also default-implements the copy-constructor
*/
#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived)
/** \internal
* \brief Macro to manually define default constructors and destructors.
* This is necessary when the copy constructor is re-defined.
* For empty helper classes this should usually be protected, to avoid accidentally creating empty objects.
*
* Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision
*/
#if EIGEN_HAS_CXX11
#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \
EIGEN_DEVICE_FUNC Derived() = default; \
EIGEN_DEVICE_FUNC ~Derived() = default;
#else
#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \
EIGEN_DEVICE_FUNC Derived() {}; \
/* EIGEN_DEVICE_FUNC ~Derived() {}; */
#endif
/**
* Just a side note. Commenting within defines works only by documenting

View File

@ -97,6 +97,9 @@ template<> struct is_arithmetic<unsigned int> { enum { value = true }; };
template<> struct is_arithmetic<signed long> { enum { value = true }; };
template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
#if EIGEN_HAS_CXX11
using std::is_integral;
#else
template<typename T> struct is_integral { enum { value = false }; };
template<> struct is_integral<bool> { enum { value = true }; };
template<> struct is_integral<char> { enum { value = true }; };
@ -108,6 +111,11 @@ template<> struct is_integral<signed int> { enum { value = true }; };
template<> struct is_integral<unsigned int> { enum { value = true }; };
template<> struct is_integral<signed long> { enum { value = true }; };
template<> struct is_integral<unsigned long> { enum { value = true }; };
#if EIGEN_COMP_MSVC
template<> struct is_integral<signed __int64> { enum { value = true }; };
template<> struct is_integral<unsigned __int64>{ enum { value = true }; };
#endif
#endif
#if EIGEN_HAS_CXX11
using std::make_unsigned;
@ -531,4 +539,30 @@ bool not_equal_strict(const double& x,const double& y) { return std::not_equal_t
} // end namespace Eigen
// Define portable (u)int{32,64} types
#if EIGEN_HAS_CXX11
#include <cstdint>
namespace Eigen {
namespace numext {
typedef std::uint32_t uint32_t;
typedef std::int32_t int32_t;
typedef std::uint64_t uint64_t;
typedef std::int64_t int64_t;
}
}
#else
// Without c++11, all compilers able to compile Eigen also
// provides the C99 stdint.h header file.
#include <stdint.h>
namespace Eigen {
namespace numext {
typedef ::uint32_t uint32_t;
typedef ::int32_t int32_t;
typedef ::uint64_t uint64_t;
typedef ::int64_t int64_t;
}
}
#endif
#endif // EIGEN_META_H

View File

@ -1,4 +1,8 @@
#ifdef EIGEN_WARNINGS_DISABLED
#ifdef EIGEN_WARNINGS_DISABLED_2
// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet!
# undef EIGEN_WARNINGS_DISABLED_2
#elif defined(EIGEN_WARNINGS_DISABLED)
#undef EIGEN_WARNINGS_DISABLED
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS

View File

@ -34,6 +34,20 @@ inline IndexDest convert_index(const IndexSrc& idx) {
return IndexDest(idx);
}
// true if T can be considered as an integral index (i.e., and integral type or enum)
template<typename T> struct is_valid_index_type
{
enum { value =
#if EIGEN_HAS_TYPE_TRAITS
internal::is_integral<T>::value || std::is_enum<T>::value
#elif EIGEN_COMP_MSVC
internal::is_integral<T>::value || __is_enum(T)
#else
// without C++11, we use is_convertible to Index instead of is_integral in order to treat enums as Index.
internal::is_convertible<T,Index>::value && !internal::is_same<T,float>::value && !is_same<T,double>::value
#endif
};
};
// promote_scalar_arg is an helper used in operation between an expression and a scalar, like:
// expression * scalar
@ -90,6 +104,9 @@ class no_assignment_operator
{
private:
no_assignment_operator& operator=(const no_assignment_operator&);
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(no_assignment_operator)
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(no_assignment_operator)
};
/** \internal return the index type with the largest number of bits */

View File

@ -300,10 +300,13 @@ typename ComplexSchur<MatrixType>::ComplexScalar ComplexSchur<MatrixType>::compu
ComplexScalar trace = t.coeff(0,0) + t.coeff(1,1);
ComplexScalar eival1 = (trace + disc) / RealScalar(2);
ComplexScalar eival2 = (trace - disc) / RealScalar(2);
if(numext::norm1(eival1) > numext::norm1(eival2))
RealScalar eival1_norm = numext::norm1(eival1);
RealScalar eival2_norm = numext::norm1(eival2);
// A division by zero can only occur if eival1==eival2==0.
// In this case, det==0, and all we have to do is checking that eival2_norm!=0
if(eival1_norm > eival2_norm)
eival2 = det / eival1;
else
else if(eival2_norm!=RealScalar(0))
eival1 = det / eival2;
// choose the eigenvalue closest to the bottom entry of the diagonal

View File

@ -236,7 +236,7 @@ template<typename _MatrixType> class RealSchur
typedef Matrix<Scalar,3,1> Vector3s;
Scalar computeNormOfT();
Index findSmallSubdiagEntry(Index iu);
Index findSmallSubdiagEntry(Index iu, const Scalar& considerAsZero);
void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
@ -302,12 +302,16 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
Index totalIter = 0; // iteration count for whole matrix
Scalar exshift(0); // sum of exceptional shifts
Scalar norm = computeNormOfT();
// sub-diagonal entries smaller than considerAsZero will be treated as zero.
// We use eps^2 to enable more precision in small eigenvalues.
Scalar considerAsZero = numext::maxi<Scalar>( norm * numext::abs2(NumTraits<Scalar>::epsilon()),
(std::numeric_limits<Scalar>::min)() );
if(norm!=Scalar(0))
{
while (iu >= 0)
{
Index il = findSmallSubdiagEntry(iu);
Index il = findSmallSubdiagEntry(iu,considerAsZero);
// Check for convergence
if (il == iu) // One root found
@ -364,14 +368,17 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
/** \internal Look for single small sub-diagonal element and returns its index */
template<typename MatrixType>
inline Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu)
inline Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& considerAsZero)
{
using std::abs;
Index res = iu;
while (res > 0)
{
Scalar s = abs(m_matT.coeff(res-1,res-1)) + abs(m_matT.coeff(res,res));
if (abs(m_matT.coeff(res,res-1)) <= NumTraits<Scalar>::epsilon() * s)
s = numext::maxi<Scalar>(s * NumTraits<Scalar>::epsilon(), considerAsZero);
if (abs(m_matT.coeff(res,res-1)) <= s)
break;
res--;
}

View File

@ -605,7 +605,8 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
EIGEN_DEVICE_FUNC
static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative)
{
using std::abs;
EIGEN_USING_STD_MATH(sqrt)
EIGEN_USING_STD_MATH(abs)
Index i0;
// Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal):
mat.diagonal().cwiseAbs().maxCoeff(&i0);
@ -616,8 +617,8 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
VectorType c0, c1;
n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm();
n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm();
if(n0>n1) res = c0/std::sqrt(n0);
else res = c1/std::sqrt(n1);
if(n0>n1) res = c0/sqrt(n0);
else res = c1/sqrt(n1);
return true;
}

View File

@ -169,20 +169,38 @@ class QuaternionBase : public RotationBase<Derived, 3>
/** return the result vector of \a v through the rotation*/
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
#ifdef EIGEN_PARSED_BY_DOXYGEN
/** \returns \c *this with scalar type casted to \a NewScalarType
*
* Note that if \a NewScalarType is equal to the current scalar type of \c *this
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const;
#else
template<typename NewScalarType>
EIGEN_DEVICE_FUNC inline
typename internal::enable_if<internal::is_same<Scalar,NewScalarType>::value,const Derived&>::type cast() const
{
return typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type(derived());
return derived();
}
template<typename NewScalarType>
EIGEN_DEVICE_FUNC inline
typename internal::enable_if<!internal::is_same<Scalar,NewScalarType>::value,Quaternion<NewScalarType> >::type cast() const
{
return Quaternion<NewScalarType>(coeffs().template cast<NewScalarType>());
}
#endif
#ifdef EIGEN_QUATERNIONBASE_PLUGIN
# include EIGEN_QUATERNIONBASE_PLUGIN
#endif
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(QuaternionBase)
EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(QuaternionBase)
};
/***************************************************************************

2
gtsam/3rdparty/Eigen/Eigen/src/Geometry/Scaling.h vendored Executable file → Normal file
View File

@ -14,7 +14,7 @@ namespace Eigen {
/** \geometry_module \ingroup Geometry_Module
*
* \class Scaling
* \class UniformScaling
*
* \brief Represents a generic uniform scaling transformation
*

View File

@ -252,11 +252,11 @@ protected:
public:
/** Default constructor without initialization of the meaningful coefficients.
* If Mode==Affine, then the last row is set to [0 ... 0 1] */
* If Mode==Affine or Mode==Isometry, then the last row is set to [0 ... 0 1] */
EIGEN_DEVICE_FUNC inline Transform()
{
check_template_params();
internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
internal::transform_make_affine<(int(Mode)==Affine || int(Mode)==Isometry) ? Affine : AffineCompact>::run(m_matrix);
}
EIGEN_DEVICE_FUNC inline Transform(const Transform& other)

View File

@ -138,12 +138,6 @@ public:
/** \returns the inverse translation (opposite) */
Translation inverse() const { return Translation(-m_coeffs); }
Translation& operator=(const Translation& other)
{
m_coeffs = other.m_coeffs;
return *this;
}
static const Translation Identity() { return Translation(VectorType::Zero()); }
/** \returns \c *this with scalar type casted to \a NewScalarType

View File

@ -87,7 +87,7 @@ struct umeyama_transform_matrix_type
* \f{align*}
* T = \begin{bmatrix} c\mathbf{R} & \mathbf{t} \\ \mathbf{0} & 1 \end{bmatrix}
* \f}
* minimizing the resudiual above. This transformation is always returned as an
* minimizing the residual above. This transformation is always returned as an
* Eigen::Matrix.
*/
template <typename Derived, typename OtherDerived>

View File

@ -519,7 +519,10 @@ void PartialPivLU<MatrixType>::compute()
// the row permutation is stored as int indices, so just to be sure:
eigen_assert(m_lu.rows()<NumTraits<int>::highest());
m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
if(m_lu.cols()>0)
m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
else
m_l1_norm = RealScalar(0);
eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
const Index size = m_lu.rows();

View File

@ -44,7 +44,7 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
static void run(const MatrixType& mat, ResultType& result)
{
ActualMatrixType matrix(mat);
EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
const Packet4f p4f_sign_PNNP = _mm_castsi128_ps(_mm_set_epi32(0x00000000, 0x80000000, 0x80000000, 0x00000000));
// Load the full matrix into registers
__m128 _L1 = matrix.template packet<MatrixAlignment>( 0);
@ -139,7 +139,7 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
iC = _mm_sub_ps(iC, _mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66)));
rd = _mm_shuffle_ps(rd,rd,0);
rd = _mm_xor_ps(rd, _mm_load_ps((float*)_Sign_PNNP));
rd = _mm_xor_ps(rd, p4f_sign_PNNP);
// iB = C*|B| - D*B#*A
iB = _mm_sub_ps(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB);

View File

@ -192,7 +192,8 @@ class PardisoImpl : public SparseSolverBase<Derived>
void pardisoInit(int type)
{
m_type = type;
bool symmetric = std::abs(m_type) < 10;
EIGEN_USING_STD_MATH(abs);
bool symmetric = abs(m_type) < 10;
m_iparm[0] = 1; // No solver default
m_iparm[1] = 2; // use Metis for the ordering
m_iparm[2] = 0; // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??)

View File

@ -769,6 +769,21 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
diagShifted = diag - shift;
if(k!=actual_n-1)
{
// check that after the shift, f(mid) is still negative:
RealScalar midShifted = (right - left) / RealScalar(2);
if(shift==right)
midShifted = -midShifted;
RealScalar fMidShifted = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
if(fMidShifted>0)
{
// fMid was erroneous, fix it:
shift = fMidShifted > Literal(0) ? left : right;
diagShifted = diag - shift;
}
}
// initial guess
RealScalar muPrev, muCur;
if (shift == left)
@ -845,11 +860,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
}
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
eigen_internal_assert(fLeft<Literal(0));
#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_DEBUG_VERBOSE
RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
#endif
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
if(!(fLeft * fRight<0))
{
@ -859,22 +876,36 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
#endif
eigen_internal_assert(fLeft * fRight < Literal(0));
while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
if(fLeft<Literal(0))
{
RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
if (fLeft * fMid < Literal(0))
while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
{
rightShifted = midShifted;
}
else
{
leftShifted = midShifted;
fLeft = fMid;
}
}
RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
eigen_internal_assert((numext::isfinite)(fMid));
muCur = (leftShifted + rightShifted) / Literal(2);
if (fLeft * fMid < Literal(0))
{
rightShifted = midShifted;
}
else
{
leftShifted = midShifted;
fLeft = fMid;
}
}
muCur = (leftShifted + rightShifted) / Literal(2);
}
else
{
// We have a problem as shifting on the left or right give either a positive or negative value
// at the middle of [left,right]...
// Instead fo abbording or entering an infinite loop,
// let's just use the middle as the estimated zero-crossing:
muCur = (right - left) * RealScalar(0.5);
if(shift == right)
muCur = -muCur;
}
}
singVals[k] = shift + muCur;
@ -924,7 +955,7 @@ void BDCSVD<MatrixType>::perturbCol0
Index j = i<k ? i : perm(l-1);
prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
if(i!=k && std::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )
if(i!=k && numext::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )
std::cout << " " << ((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk))
<< ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n";
#endif
@ -934,7 +965,7 @@ void BDCSVD<MatrixType>::perturbCol0
std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
#endif
RealScalar tmp = sqrt(prod);
zhat(k) = col0(k) > Literal(0) ? tmp : -tmp;
zhat(k) = col0(k) > Literal(0) ? RealScalar(tmp) : RealScalar(-tmp);
}
}
}

View File

@ -183,7 +183,7 @@ public:
// this temporary is needed to workaround a MSVC issue
Index diagSize = (std::max<Index>)(1,m_diagSize);
return m_usePrescribedThreshold ? m_prescribedThreshold
: diagSize*NumTraits<Scalar>::epsilon();
: RealScalar(diagSize)*NumTraits<Scalar>::epsilon();
}
/** \returns true if \a U (full or thin) is asked for in this SVD decomposition */

View File

@ -608,7 +608,7 @@ public:
}
if(Base::m_diag.size()>0)
dest = Base::m_diag.asDiagonal().inverse() * dest;
dest = Base::m_diag.real().asDiagonal().inverse() * dest;
if (Base::m_matrix.nonZeros()>0) // otherwise I==I
{

View File

@ -156,7 +156,7 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
/* the nonzero entry L(k,i) */
Scalar l_ki;
if(DoLDLT)
l_ki = yi / m_diag[i];
l_ki = yi / numext::real(m_diag[i]);
else
yi = l_ki = yi / Lx[Lp[i]];

View File

@ -28,7 +28,7 @@ class AmbiVector
typedef typename NumTraits<Scalar>::Real RealScalar;
explicit AmbiVector(Index size)
: m_buffer(0), m_zero(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)
: m_buffer(0), m_zero(0), m_size(0), m_end(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)
{
resize(size);
}
@ -147,7 +147,8 @@ template<typename _Scalar,typename _StorageIndex>
void AmbiVector<_Scalar,_StorageIndex>::init(int mode)
{
m_mode = mode;
if (m_mode==IsSparse)
// This is only necessary in sparse mode, but we set these unconditionally to avoid some maybe-uninitialized warnings
// if (m_mode==IsSparse)
{
m_llSize = 0;
m_llStart = -1;

View File

@ -49,6 +49,7 @@ template<typename UnaryOp, typename ArgType>
class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterator
: public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator
{
protected:
typedef typename XprType::Scalar Scalar;
typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator Base;
public:
@ -99,6 +100,7 @@ template<typename ViewOp, typename ArgType>
class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerIterator
: public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator
{
protected:
typedef typename XprType::Scalar Scalar;
typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator Base;
public:

View File

@ -327,7 +327,8 @@ class SparseMatrix
m_outerIndex[j] = newOuterIndex[j];
m_innerNonZeros[j] = innerNNZ;
}
m_outerIndex[m_outerSize] = m_outerIndex[m_outerSize-1] + m_innerNonZeros[m_outerSize-1] + reserveSizes[m_outerSize-1];
if(m_outerSize>0)
m_outerIndex[m_outerSize] = m_outerIndex[m_outerSize-1] + m_innerNonZeros[m_outerSize-1] + reserveSizes[m_outerSize-1];
m_data.resize(m_outerIndex[m_outerSize]);
}

View File

@ -453,7 +453,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
Index r = it.row();
Index c = it.col();
Index ip = perm ? perm[i] : i;
if(Mode==(Upper|Lower))
if(Mode==int(Upper|Lower))
count[StorageOrderMatch ? jp : ip]++;
else if(r==c)
count[ip]++;
@ -486,7 +486,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
StorageIndex jp = perm ? perm[j] : j;
StorageIndex ip = perm ? perm[i] : i;
if(Mode==(Upper|Lower))
if(Mode==int(Upper|Lower))
{
Index k = count[StorageOrderMatch ? jp : ip]++;
dest.innerIndexPtr()[k] = StorageOrderMatch ? ip : jp;

View File

@ -90,6 +90,7 @@ struct unary_evaluator<SparseView<ArgType>, IteratorBased>
class InnerIterator : public EvalIterator
{
protected:
typedef typename XprType::Scalar Scalar;
public:

View File

@ -43,8 +43,8 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu
* Simple example with key steps
* \code
* VectorXd x(n), b(n);
* SparseMatrix<double, ColMajor> A;
* SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> > solver;
* SparseMatrix<double> A;
* SparseLU<SparseMatrix<double>, COLAMDOrdering<int> > solver;
* // fill A and b;
* // Compute the ordering permutation vector from the structural pattern of A
* solver.analyzePattern(A);

View File

@ -98,8 +98,10 @@ namespace std {
{ return deque_base::insert(position,x); }
void insert(const_iterator position, size_type new_size, const value_type& x)
{ deque_base::insert(position, new_size, x); }
#elif defined(_GLIBCXX_DEQUE) && EIGEN_GNUC_AT_LEAST(4,2)
#elif defined(_GLIBCXX_DEQUE) && EIGEN_GNUC_AT_LEAST(4,2) && !EIGEN_GNUC_AT_LEAST(10, 1)
// workaround GCC std::deque implementation
// GCC 10.1 doesn't let us access _Deque_impl _M_impl anymore and we have to
// fall-back to the default case
void resize(size_type new_size, const value_type& x)
{
if (new_size < deque_base::size())
@ -108,7 +110,7 @@ namespace std {
deque_base::insert(deque_base::end(), new_size - deque_base::size(), x);
}
#else
// either GCC 4.1 or non-GCC
// either non-GCC or GCC between 4.1 and 10.1
// default implementation which should always work.
void resize(size_type new_size, const value_type& x)
{

View File

@ -119,7 +119,7 @@ OP(const Scalar& s) const { \
return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
} \
EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \
OP(const Scalar& s, const Derived& d) { \
OP(const Scalar& s, const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& d) { \
return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \
}

View File

@ -112,6 +112,7 @@ void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr,
cr.noalias() -= ai * bi;
ci.noalias() += ar * bi;
ci.noalias() += ai * br;
// [cr ci] += [ar ai] * br + [-ai ar] * bi
}
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci)
@ -240,7 +241,7 @@ int main(int argc, char ** argv)
blas_gemm(a,b,r);
c.noalias() += a * b;
if(!r.isApprox(c)) {
std::cout << r - c << "\n";
std::cout << (r - c).norm() << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
#else
@ -249,7 +250,7 @@ int main(int argc, char ** argv)
gemm(a,b,c);
r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
if(!r.isApprox(c)) {
std::cout << r - c << "\n";
std::cout << (r - c).norm() << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
}

View File

@ -39,9 +39,9 @@ endif()
add_dependencies(blas eigen_blas eigen_blas_static)
install(TARGETS eigen_blas eigen_blas_static
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
if(EIGEN_Fortran_COMPILER_WORKS)

View File

@ -13,28 +13,28 @@ int EIGEN_BLAS_FUNC(gemm)(const char *opa, const char *opb, const int *m, const
const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc)
{
// std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*);
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*);
static const functype func[12] = {
// array index: NOTR | (NOTR << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor,1>::run),
// array index: TR | (NOTR << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor,1>::run),
// array index: ADJ | (NOTR << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,1>::run),
0,
// array index: NOTR | (TR << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor,1>::run),
// array index: TR | (TR << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor,1>::run),
// array index: ADJ | (TR << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor,1>::run),
0,
// array index: NOTR | (ADJ << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,1>::run),
// array index: TR | (ADJ << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor,1>::run),
// array index: ADJ | (ADJ << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run),
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor,1>::run),
0
};
@ -71,7 +71,7 @@ int EIGEN_BLAS_FUNC(gemm)(const char *opa, const char *opb, const int *m, const
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,*k,1,true);
int code = OP(*opa) | (OP(*opb) << 2);
func[code](*m, *n, *k, a, *lda, b, *ldb, c, *ldc, alpha, blocking, 0);
func[code](*m, *n, *k, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking, 0);
return 0;
}
@ -79,63 +79,63 @@ int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, c
const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb)
{
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
static const functype func[32] = {
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run),\
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor,1>::run),\
0,
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor,1>::run),
0,
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor,1>::run),
0,
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor,1>::run),
0,
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
0,
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
0,
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
0,
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
0
};
@ -163,12 +163,12 @@ int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, c
if(SIDE(*side)==LEFT)
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
func[code](*m, *n, a, *lda, b, *ldb, blocking);
func[code](*m, *n, a, *lda, b, 1, *ldb, blocking);
}
else
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n,1,false);
func[code](*n, *m, a, *lda, b, *ldb, blocking);
func[code](*n, *m, a, *lda, b, 1, *ldb, blocking);
}
if(alpha!=Scalar(1))
@ -184,63 +184,63 @@ int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, c
const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb)
{
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
static const functype func[32] = {
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
0,
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
0,
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
0,
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
0,
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
0,
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
0,
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
0,
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
0
};
@ -272,12 +272,12 @@ int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, c
if(SIDE(*side)==LEFT)
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking);
func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, 1, *ldb, alpha, blocking);
}
else
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n,1,false);
func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha, blocking);
func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, 1, *ldb, alpha, blocking);
}
return 1;
}
@ -338,12 +338,12 @@ int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, cons
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
if(SIDE(*side)==LEFT)
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor,1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor,1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
else return 0;
else if(SIDE(*side)==RIGHT)
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor,1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor,1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
else return 0;
else
return 0;
@ -359,21 +359,21 @@ int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const
{
// std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
#if !ISCOMPLEX
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
static const functype func[8] = {
// array index: NOTR | (UP << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, 1, Upper>::run),
// array index: TR | (UP << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, 1, Upper>::run),
// array index: ADJ | (UP << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,1, Upper>::run),
0,
// array index: NOTR | (LO << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, 1, Lower>::run),
// array index: TR | (LO << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, 1, Lower>::run),
// array index: ADJ | (LO << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,1, Lower>::run),
0
};
#endif
@ -426,7 +426,7 @@ int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false);
int code = OP(*op) | (UPLO(*uplo) << 2);
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking);
func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
#endif
return 0;
@ -537,18 +537,18 @@ int EIGEN_BLAS_FUNC(hemm)(const char *side, const char *uplo, const int *m, cons
if(SIDE(*side)==LEFT)
{
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor>
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor>
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor, 1>
::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor,1>
::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
else return 0;
}
else if(SIDE(*side)==RIGHT)
{
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor>
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor>
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor, 1>
::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);*/
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor,1>
::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
else return 0;
}
else
@ -566,19 +566,19 @@ int EIGEN_BLAS_FUNC(herk)(const char *uplo, const char *op, const int *n, const
{
// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
static const functype func[8] = {
// array index: NOTR | (UP << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,1,Upper>::run),
0,
// array index: ADJ | (UP << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,1,Upper>::run),
0,
// array index: NOTR | (LO << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,1,Lower>::run),
0,
// array index: ADJ | (LO << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run),
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,1,Lower>::run),
0
};
@ -620,7 +620,7 @@ int EIGEN_BLAS_FUNC(herk)(const char *uplo, const char *op, const int *n, const
if(*k>0 && alpha!=RealScalar(0))
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false);
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking);
func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
}
return 0;

View File

@ -677,6 +677,8 @@ macro(ei_set_build_string)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11)
endif()
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-v3.3)
if(EIGEN_BUILD_STRING_SUFFIX)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX})
endif()

View File

@ -19,8 +19,11 @@ include(CheckCXXSourceCompiles)
# notice the std:: is required on some platforms such as QNX
set(find_standard_math_library_test_program
"#include<cmath>
int main() { std::sin(0.0); std::log(0.0f); }")
"
#include<cmath>
int main(int argc, char **){
return int(std::sin(double(argc)) + std::log(double(argc)));
}")
# first try compiling/linking the test program without any linker flags

View File

@ -11,7 +11,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
endif(CMAKE_COMPILER_IS_GNUCXX)
option(EIGEN_INTERNAL_DOCUMENTATION "Build internal documentation" OFF)
option(EIGEN_DOC_USE_MATHJAX "Use MathJax for rendering math in HTML docs" ON)
# Set some Doxygen flags
set(EIGEN_DOXY_PROJECT_NAME "Eigen")
@ -19,12 +19,19 @@ set(EIGEN_DOXY_OUTPUT_DIRECTORY_SUFFIX "")
set(EIGEN_DOXY_INPUT "\"${Eigen_SOURCE_DIR}/Eigen\" \"${Eigen_SOURCE_DIR}/doc\"")
set(EIGEN_DOXY_HTML_COLORSTYLE_HUE "220")
set(EIGEN_DOXY_TAGFILES "")
if(EIGEN_INTERNAL_DOCUMENTATION)
set(EIGEN_DOXY_INTERNAL "YES")
else(EIGEN_INTERNAL_DOCUMENTATION)
set(EIGEN_DOXY_INTERNAL "NO")
endif(EIGEN_INTERNAL_DOCUMENTATION)
if (EIGEN_DOC_USE_MATHJAX)
set(EIGEN_DOXY_USE_MATHJAX "YES")
else ()
set(EIGEN_DOXY_USE_MATHJAX "NO")
endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile

View File

@ -75,7 +75,7 @@ namespace Eigen {
static inline Real epsilon() { return 0; }
static inline Real dummy_precision() { return 0; }
static inline Real digits10() { return 0; }
static inline int digits10() { return 0; }
enum {
IsInteger = 0,

View File

@ -736,6 +736,14 @@ EXCLUDE = "${Eigen_SOURCE_DIR}/Eigen/src/Core/products" \
"${Eigen_SOURCE_DIR}/unsupported/doc/examples" \
"${Eigen_SOURCE_DIR}/unsupported/doc/snippets"
# Forward declarations of class templates cause the title of the main page for
# the class template to not contain the template signature. This only happens
# when the \class command is used to document the class. Possibly caused
# by https://github.com/doxygen/doxygen/issues/7698. Confirmed fixed by
# doxygen release 1.8.19.
EXCLUDE += "${Eigen_SOURCE_DIR}/Eigen/src/Core/util/ForwardDeclarations.h"
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
# from the input.
@ -1245,7 +1253,7 @@ FORMULA_TRANSPARENT = YES
# output. When enabled you may also need to install MathJax separately and
# configure the path to it using the MATHJAX_RELPATH option.
USE_MATHJAX = NO
USE_MATHJAX = @EIGEN_DOXY_USE_MATHJAX@
# When MathJax is enabled you need to specify the location relative to the
# HTML output directory using the MATHJAX_RELPATH option. The destination
@ -1257,12 +1265,12 @@ USE_MATHJAX = NO
# However, it is strongly recommended to install a local
# copy of MathJax from http://www.mathjax.org before deployment.
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
MATHJAX_RELPATH = https://cdn.mathjax.org/mathjax/latest
# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
# names that should be enabled during MathJax rendering.
MATHJAX_EXTENSIONS =
MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
# When the SEARCHENGINE tag is enabled doxygen will generate a search box
# for the HTML output. The underlying search engine uses javascript
@ -1609,6 +1617,9 @@ PREDEFINED = EIGEN_EMPTY_STRUCT \
EXPAND_AS_DEFINED = EIGEN_MAKE_TYPEDEFS \
EIGEN_MAKE_FIXED_TYPEDEFS \
EIGEN_MAKE_TYPEDEFS_ALL_SIZES \
EIGEN_MAKE_ARRAY_TYPEDEFS \
EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS \
EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES \
EIGEN_CWISE_UNOP_RETURN_TYPE \
EIGEN_CWISE_BINOP_RETURN_TYPE \
EIGEN_CURRENT_STORAGE_BASE_CLASS \

View File

@ -7,14 +7,30 @@ namespace Eigen {
See this \link TopicTemplateKeyword page \endlink.
\section TopicPitfalls_aliasing Aliasing
Don't miss this \link TopicAliasing page \endlink on aliasing,
especially if you got wrong results in statements where the destination appears on the right hand side of the expression.
\section TopicPitfalls_alignment_issue Alignment Issues (runtime assertion)
%Eigen does explicit vectorization, and while that is appreciated by many users, that also leads to some issues in special situations where data alignment is compromised.
Indeed, since C++17, C++ does not have quite good enough support for explicit data alignment.
In that case your program hits an assertion failure (that is, a "controlled crash") with a message that tells you to consult this page:
\code
http://eigen.tuxfamily.org/dox/group__TopicUnalignedArrayAssert.html
\endcode
Have a look at \link TopicUnalignedArrayAssert it \endlink and see for yourself if that's something that you can cope with.
It contains detailed information about how to deal with each known cause for that issue.
Now what if you don't care about vectorization and so don't want to be annoyed with these alignment issues? Then read \link getrid how to get rid of them \endlink.
\section TopicPitfalls_auto_keyword C++11 and the auto keyword
In short: do not use the auto keywords with Eigen's expressions, unless you are 100% sure about what you are doing. In particular, do not use the auto keyword as a replacement for a Matrix<> type. Here is an example:
In short: do not use the auto keywords with %Eigen's expressions, unless you are 100% sure about what you are doing. In particular, do not use the auto keyword as a replacement for a \c Matrix<> type. Here is an example:
\code
MatrixXd A, B;
@ -22,23 +38,81 @@ auto C = A*B;
for(...) { ... w = C * v; ...}
\endcode
In this example, the type of C is not a MatrixXd but an abstract expression representing a matrix product and storing references to A and B. Therefore, the product of A*B will be carried out multiple times, once per iteration of the for loop. Moreover, if the coefficients of A or B change during the iteration, then C will evaluate to different values.
In this example, the type of C is not a \c MatrixXd but an abstract expression representing a matrix product and storing references to \c A and \c B.
Therefore, the product of \c A*B will be carried out multiple times, once per iteration of the for loop.
Moreover, if the coefficients of A or B change during the iteration, then C will evaluate to different values.
Here is another example leading to a segfault:
\code
auto C = ((A+B).eval()).transpose();
// do something with C
\endcode
The problem is that eval() returns a temporary object (in this case a MatrixXd) which is then referenced by the Transpose<> expression. However, this temporary is deleted right after the first line, and there the C expression reference a dead object. The same issue might occur when sub expressions are automatically evaluated by Eigen as in the following example:
The problem is that \c eval() returns a temporary object (in this case a \c MatrixXd) which is then referenced by the \c Transpose<> expression.
However, this temporary is deleted right after the first line, and then the \c C expression references a dead object.
One possible fix consists in applying \c eval() on the whole expression:
\code
auto C = (A+B).transpose().eval();
\endcode
The same issue might occur when sub expressions are automatically evaluated by %Eigen as in the following example:
\code
VectorXd u, v;
auto C = u + (A*v).normalized();
// do something with C
\endcode
where the normalized() method has to evaluate the expensive product A*v to avoid evaluating it twice. On the other hand, the following example is perfectly fine:
Here the \c normalized() method has to evaluate the expensive product \c A*v to avoid evaluating it twice.
Again, one possible fix is to call \c .eval() on the whole expression:
\code
auto C = (u + (A*v).normalized()).eval();
\endcode
In this case, C will be a regular VectorXd object.
In this case, \c C will be a regular \c VectorXd object.
Note that DenseBase::eval() is smart enough to avoid copies when the underlying expression is already a plain \c Matrix<>.
\section TopicPitfalls_header_issues Header Issues (failure to compile)
With all libraries, one must check the documentation for which header to include.
The same is true with %Eigen, but slightly worse: with %Eigen, a method in a class may require an additional <code>#include</code> over what the class itself requires!
For example, if you want to use the \c cross() method on a vector (it computes a cross-product) then you need to:
\code
#include<Eigen/Geometry>
\endcode
We try to always document this, but do tell us if we forgot an occurrence.
\section TopicPitfalls_ternary_operator Ternary operator
In short: avoid the use of the ternary operator <code>(COND ? THEN : ELSE)</code> with %Eigen's expressions for the \c THEN and \c ELSE statements.
To see why, let's consider the following example:
\code
Vector3f A;
A << 1, 2, 3;
Vector3f B = ((1 < 0) ? (A.reverse()) : A);
\endcode
This example will return <code>B = 3, 2, 1</code>. Do you see why?
The reason is that in c++ the type of the \c ELSE statement is inferred from the type of the \c THEN expression such that both match.
Since \c THEN is a <code>Reverse<Vector3f></code>, the \c ELSE statement A is converted to a <code>Reverse<Vector3f></code>, and the compiler thus generates:
\code
Vector3f B = ((1 < 0) ? (A.reverse()) : Reverse<Vector3f>(A));
\endcode
In this very particular case, a workaround would be to call A.reverse().eval() for the \c THEN statement, but the safest and fastest is really to avoid this ternary operator with %Eigen's expressions and use a if/else construct.
\section TopicPitfalls_pass_by_value Pass-by-value
If you don't know why passing-by-value is wrong with %Eigen, read this \link TopicPassingByValue page \endlink first.
While you may be extremely careful and use care to make sure that all of your code that explicitly uses %Eigen types is pass-by-reference you have to watch out for templates which define the argument types at compile time.
If a template has a function that takes arguments pass-by-value, and the relevant template parameter ends up being an %Eigen type, then you will of course have the same alignment problems that you would in an explicitly defined function passing %Eigen types by reference.
Using %Eigen types with other third party libraries or even the STL can present the same problem.
<code>boost::bind</code> for example uses pass-by-value to store arguments in the returned functor.
This will of course be a problem.
There are at least two ways around this:
- If the value you are passing is guaranteed to be around for the life of the functor, you can use boost::ref() to wrap the value as you pass it to boost::bind. Generally this is not a solution for values on the stack as if the functor ever gets passed to a lower or independent scope, the object may be gone by the time it's attempted to be used.
- The other option is to make your functions take a reference counted pointer like boost::shared_ptr as the argument. This avoids needing to worry about managing the lifetime of the object being passed.
*/
}

View File

@ -244,7 +244,7 @@ As stated earlier, for a read-write sub-matrix (RW), the evaluation can be done
<td>
\code
sm1.valuePtr(); // Pointer to the values
sm1.innerIndextr(); // Pointer to the indices.
sm1.innerIndexPtr(); // Pointer to the indices.
sm1.outerIndexPtr(); // Pointer to the beginning of each inner vector
\endcode
</td>

View File

@ -2,63 +2,95 @@ namespace Eigen {
/** \page TopicLazyEvaluation Lazy Evaluation and Aliasing
Executive summary: Eigen has intelligent compile-time mechanisms to enable lazy evaluation and removing temporaries where appropriate.
Executive summary: %Eigen has intelligent compile-time mechanisms to enable lazy evaluation and removing temporaries where appropriate.
It will handle aliasing automatically in most cases, for example with matrix products. The automatic behavior can be overridden
manually by using the MatrixBase::eval() and MatrixBase::noalias() methods.
When you write a line of code involving a complex expression such as
\code mat1 = mat2 + mat3 * (mat4 + mat5); \endcode
\code mat1 = mat2 + mat3 * (mat4 + mat5);
\endcode
Eigen determines automatically, for each sub-expression, whether to evaluate it into a temporary variable. Indeed, in certain cases it is better to evaluate immediately a sub-expression into a temporary variable, while in other cases it is better to avoid that.
%Eigen determines automatically, for each sub-expression, whether to evaluate it into a temporary variable. Indeed, in certain cases it is better to evaluate a sub-expression into a temporary variable, while in other cases it is better to avoid that.
A traditional math library without expression templates always evaluates all sub-expressions into temporaries. So with this code,
\code vec1 = vec2 + vec3; \endcode
\code vec1 = vec2 + vec3;
\endcode
a traditional library would evaluate \c vec2 + vec3 into a temporary \c vec4 and then copy \c vec4 into \c vec1. This is of course inefficient: the arrays are traversed twice, so there are a lot of useless load/store operations.
Expression-templates-based libraries can avoid evaluating sub-expressions into temporaries, which in many cases results in large speed improvements. This is called <i>lazy evaluation</i> as an expression is getting evaluated as late as possible, instead of immediately. However, most other expression-templates-based libraries <i>always</i> choose lazy evaluation. There are two problems with that: first, lazy evaluation is not always a good choice for performance; second, lazy evaluation can be very dangerous, for example with matrix products: doing <tt>matrix = matrix*matrix</tt> gives a wrong result if the matrix product is lazy-evaluated, because of the way matrix product works.
Expression-templates-based libraries can avoid evaluating sub-expressions into temporaries, which in many cases results in large speed improvements.
This is called <i>lazy evaluation</i> as an expression is getting evaluated as late as possible.
In %Eigen <b>all expressions are lazy-evaluated</b>.
More precisely, an expression starts to be evaluated once it is assigned to a matrix.
Until then nothing happens beyond constructing the abstract expression tree.
In contrast to most other expression-templates-based libraries, however, <b>%Eigen might choose to evaluate some sub-expressions into temporaries</b>.
There are two reasons for that: first, pure lazy evaluation is not always a good choice for performance; second, pure lazy evaluation can be very dangerous, for example with matrix products: doing <tt>mat = mat*mat</tt> gives a wrong result if the matrix product is directly evaluated within the destination matrix, because of the way matrix product works.
For these reasons, Eigen has intelligent compile-time mechanisms to determine automatically when to use lazy evaluation, and when on the contrary it should evaluate immediately into a temporary variable.
For these reasons, %Eigen has intelligent compile-time mechanisms to determine automatically which sub-expression should be evaluated into a temporary variable.
So in the basic example,
\code matrix1 = matrix2 + matrix3; \endcode
\code mat1 = mat2 + mat3;
\endcode
Eigen chooses lazy evaluation. Thus the arrays are traversed only once, producing optimized code. If you really want to force immediate evaluation, use \link MatrixBase::eval() eval()\endlink:
%Eigen chooses not to introduce any temporary. Thus the arrays are traversed only once, producing optimized code.
If you really want to force immediate evaluation, use \link MatrixBase::eval() eval()\endlink:
\code matrix1 = (matrix2 + matrix3).eval(); \endcode
\code mat1 = (mat2 + mat3).eval();
\endcode
Here is now a more involved example:
\code matrix1 = -matrix2 + matrix3 + 5 * matrix4; \endcode
\code mat1 = -mat2 + mat3 + 5 * mat4;
\endcode
Eigen chooses lazy evaluation at every stage in that example, which is clearly the correct choice. In fact, lazy evaluation is the "default choice" and Eigen will choose it except in a few circumstances.
Here again %Eigen won't introduce any temporary, thus producing a single <b>fused</b> evaluation loop, which is clearly the correct choice.
<b>The first circumstance</b> in which Eigen chooses immediate evaluation, is when it sees an assignment <tt>a = b;</tt> and the expression \c b has the evaluate-before-assigning \link flags flag\endlink. The most important example of such an expression is the \link Product matrix product expression\endlink. For example, when you do
\section TopicLazyEvaluationWhichExpr Which sub-expressions are evaluated into temporaries?
\code matrix = matrix * matrix; \endcode
The default evaluation strategy is to fuse the operations in a single loop, and %Eigen will choose it except in a few circumstances.
Eigen first evaluates <tt>matrix * matrix</tt> into a temporary matrix, and then copies it into the original \c matrix. This guarantees a correct result as we saw above that lazy evaluation gives wrong results with matrix products. It also doesn't cost much, as the cost of the matrix product itself is much higher.
<b>The first circumstance</b> in which %Eigen chooses to evaluate a sub-expression is when it sees an assignment <tt>a = b;</tt> and the expression \c b has the evaluate-before-assigning \link flags flag\endlink.
The most important example of such an expression is the \link Product matrix product expression\endlink. For example, when you do
\code mat = mat * mat;
\endcode
%Eigen will evaluate <tt>mat * mat</tt> into a temporary matrix, and then copies it into the original \c mat.
This guarantees a correct result as we saw above that lazy evaluation gives wrong results with matrix products.
It also doesn't cost much, as the cost of the matrix product itself is much higher.
Note that this temporary is introduced at evaluation time only, that is, within operator= in this example.
The expression <tt>mat * mat</tt> still return a abstract product type.
What if you know that the result does no alias the operand of the product and want to force lazy evaluation? Then use \link MatrixBase::noalias() .noalias()\endlink instead. Here is an example:
\code matrix1.noalias() = matrix2 * matrix2; \endcode
\code mat1.noalias() = mat2 * mat2;
\endcode
Here, since we know that matrix2 is not the same matrix as matrix1, we know that lazy evaluation is not dangerous, so we may force lazy evaluation. Concretely, the effect of noalias() here is to bypass the evaluate-before-assigning \link flags flag\endlink.
Here, since we know that mat2 is not the same matrix as mat1, we know that lazy evaluation is not dangerous, so we may force lazy evaluation. Concretely, the effect of noalias() here is to bypass the evaluate-before-assigning \link flags flag\endlink.
<b>The second circumstance</b> in which Eigen chooses immediate evaluation, is when it sees a nested expression such as <tt>a + b</tt> where \c b is already an expression having the evaluate-before-nesting \link flags flag\endlink. Again, the most important example of such an expression is the \link Product matrix product expression\endlink. For example, when you do
<b>The second circumstance</b> in which %Eigen chooses to evaluate a sub-expression, is when it sees a nested expression such as <tt>a + b</tt> where \c b is already an expression having the evaluate-before-nesting \link flags flag\endlink.
Again, the most important example of such an expression is the \link Product matrix product expression\endlink.
For example, when you do
\code matrix1 = matrix2 + matrix3 * matrix4; \endcode
\code mat1 = mat2 * mat3 + mat4 * mat5;
\endcode
the product <tt>matrix3 * matrix4</tt> gets evaluated immediately into a temporary matrix. Indeed, experiments showed that it is often beneficial for performance to evaluate immediately matrix products when they are nested into bigger expressions.
the products <tt>mat2 * mat3</tt> and <tt>mat4 * mat5</tt> gets evaluated separately into temporary matrices before being summed up in <tt>mat1</tt>.
Indeed, to be efficient matrix products need to be evaluated within a destination matrix at hand, and not as simple "dot products".
For small matrices, however, you might want to enforce a "dot-product" based lazy evaluation with lazyProduct().
Again, it is important to understand that those temporaries are created at evaluation time only, that is in operator =.
See TopicPitfalls_auto_keyword for common pitfalls regarding this remark.
<b>The third circumstance</b> in which Eigen chooses immediate evaluation, is when its cost model shows that the total cost of an operation is reduced if a sub-expression gets evaluated into a temporary. Indeed, in certain cases, an intermediate result is sufficiently costly to compute and is reused sufficiently many times, that is worth "caching". Here is an example:
<b>The third circumstance</b> in which %Eigen chooses to evaluate a sub-expression, is when its cost model shows that the total cost of an operation is reduced if a sub-expression gets evaluated into a temporary.
Indeed, in certain cases, an intermediate result is sufficiently costly to compute and is reused sufficiently many times, that is worth "caching". Here is an example:
\code matrix1 = matrix2 * (matrix3 + matrix4); \endcode
\code mat1 = mat2 * (mat3 + mat4);
\endcode
Here, provided the matrices have at least 2 rows and 2 columns, each coefficienct of the expression <tt>matrix3 + matrix4</tt> is going to be used several times in the matrix product. Instead of computing the sum everytime, it is much better to compute it once and store it in a temporary variable. Eigen understands this and evaluates <tt>matrix3 + matrix4</tt> into a temporary variable before evaluating the product.
Here, provided the matrices have at least 2 rows and 2 columns, each coefficient of the expression <tt>mat3 + mat4</tt> is going to be used several times in the matrix product. Instead of computing the sum every time, it is much better to compute it once and store it in a temporary variable. %Eigen understands this and evaluates <tt>mat3 + mat4</tt> into a temporary variable before evaluating the product.
*/

View File

@ -49,6 +49,7 @@ int main(int argc, char** argv)
In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section.
\warning Using OpenMP with custom scalar types that might throw exceptions can lead to unexpected behaviour in the event of throwing.
*/
}

View File

@ -232,8 +232,8 @@ On the other hand, since there exist 24 different conventions, they are pretty c
to create a rotation matrix according to the 2-1-2 convention.</td><td>\code
Matrix3f m;
m = AngleAxisf(angle1, Vector3f::UnitZ())
* AngleAxisf(angle2, Vector3f::UnitY())
* AngleAxisf(angle3, Vector3f::UnitZ());
* * AngleAxisf(angle2, Vector3f::UnitY())
* * AngleAxisf(angle3, Vector3f::UnitZ());
\endcode</td></tr>
</table>

View File

@ -5,6 +5,7 @@ function generate_autotoc() {
if(headers.length > 1) {
var toc = $("#side-nav").append('<div id="nav-toc" class="toc"><h3>Table of contents</h3></div>');
toc = $("#nav-toc");
var footer = $("#nav-path");
var footerHeight = footer.height();
toc = toc.append('<ul></ul>');
toc = toc.find('ul');
@ -137,7 +138,7 @@ function initNavTree(toroot,relpath)
}
})
$(window).load(showRoot);
$(window).on("load", showRoot);
}
// return false if the the node has no children at all, or has only section/subsection children
@ -241,6 +242,6 @@ $(document).ready(function() {
}
})();
$(window).load(resizeHeight);
$(window).on("load", resizeHeight);
});

View File

@ -17,19 +17,6 @@ $generatedby &#160;<a href="http://www.doxygen.org/index.html">
</small></address>
<!--END !GENERATE_TREEVIEW-->
<!-- Piwik -->
<script type="text/javascript">
var pkBaseURL = (("https:" == document.location.protocol) ? "https://stats.sylphide-consulting.com/piwik/" : "http://stats.sylphide-consulting.com/piwik/");
document.write(unescape("%3Cscript src='" + pkBaseURL + "piwik.js' type='text/javascript'%3E%3C/script%3E"));
</script><script type="text/javascript">
try {
var piwikTracker = Piwik.getTracker(pkBaseURL + "piwik.php", 20);
piwikTracker.trackPageView();
piwikTracker.enableLinkTracking();
} catch( err ) {}
</script><noscript><p><img src="http://stats.sylphide-consulting.com/piwik/piwik.php?idsite=20" style="border:0" alt="" /></p></noscript>
<!-- End Piwik Tracking Code -->
</body>
</html>

View File

@ -20,6 +20,9 @@ $mathjax
</head>
<body>
<div style="background:#FFDDDD;font-size:120%;text-align:center;margin:0;padding:5px">Please, help us to better know about our user community by answering the following short survey: <a href="https://forms.gle/wpyrxWi18ox9Z5ae9">https://forms.gle/wpyrxWi18ox9Z5ae9</a></div>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<!--BEGIN TITLEAREA-->

View File

@ -14,5 +14,5 @@ int main()
a.block<2,2>(1,1) = m;
cout << "Here is now a with m copied into its central 2x2 block:" << endl << a << endl << endl;
a.block(0,0,2,3) = a.block(2,1,2,3);
cout << "Here is now a with bottom-right 2x3 block copied into top-left 2x2 block:" << endl << a << endl << endl;
cout << "Here is now a with bottom-right 2x3 block copied into top-left 2x3 block:" << endl << a << endl << endl;
}

View File

@ -103,9 +103,9 @@ endif()
add_dependencies(lapack eigen_lapack eigen_lapack_static)
install(TARGETS eigen_lapack eigen_lapack_static
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
@ -133,12 +133,14 @@ if(EXISTS ${eigen_full_path_to_testing_lapack})
string(REGEX REPLACE "(.*)/STACK:(.*) (.*)" "\\1/STACK:900000000000000000 \\3"
CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
endif()
file(MAKE_DIRECTORY "${LAPACK_BINARY_DIR}/TESTING")
add_subdirectory(testing/MATGEN)
add_subdirectory(testing/LIN)
add_subdirectory(testing/EIG)
cmake_policy(SET CMP0026 OLD)
macro(add_lapack_test output input target)
set(TEST_INPUT "${LAPACK_SOURCE_DIR}/testing/${input}")
set(TEST_OUTPUT "${LAPACK_BINARY_DIR}/testing/${output}")
set(TEST_OUTPUT "${LAPACK_BINARY_DIR}/TESTING/${output}")
get_target_property(TEST_LOC ${target} LOCATION)
string(REPLACE "." "_" input_name ${input})
set(testName "${target}_${input_name}")

View File

@ -163,7 +163,7 @@ ei_add_test(constructor)
ei_add_test(linearstructure)
ei_add_test(integer_types)
ei_add_test(unalignedcount)
if(NOT EIGEN_TEST_NO_EXCEPTIONS)
if(NOT EIGEN_TEST_NO_EXCEPTIONS AND NOT EIGEN_TEST_OPENMP)
ei_add_test(exceptions)
endif()
ei_add_test(redux)
@ -185,7 +185,7 @@ ei_add_test(smallvectors)
ei_add_test(mapped_matrix)
ei_add_test(mapstride)
ei_add_test(mapstaticmethods)
ei_add_test(array)
ei_add_test(array_cwise)
ei_add_test(array_for_matrix)
ei_add_test(array_replicate)
ei_add_test(array_reverse)

View File

@ -279,7 +279,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
VERIFY_IS_APPROX(m1.sign() * m1.abs(), m1);
VERIFY_IS_APPROX(numext::abs2(numext::real(m1)) + numext::abs2(numext::imag(m1)), numext::abs2(m1));
VERIFY_IS_APPROX(numext::abs2(real(m1)) + numext::abs2(imag(m1)), numext::abs2(m1));
VERIFY_IS_APPROX(numext::abs2(Eigen::real(m1)) + numext::abs2(Eigen::imag(m1)), numext::abs2(m1));
if(!NumTraits<Scalar>::IsComplex)
VERIFY_IS_APPROX(numext::real(m1), m1);
@ -368,7 +368,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
for (Index i = 0; i < m.rows(); ++i)
for (Index j = 0; j < m.cols(); ++j)
m3(i,j) = std::atan2(imag(m1(i,j)), real(m1(i,j)));
m3(i,j) = std::atan2(m1(i,j).imag(), m1(i,j).real());
VERIFY_IS_APPROX(arg(m1), m3);
std::complex<RealScalar> zero(0.0,0.0);
@ -395,7 +395,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
VERIFY_IS_APPROX(inverse(inverse(m1)),m1);
VERIFY_IS_APPROX(conj(m1.conjugate()), m1);
VERIFY_IS_APPROX(abs(m1), sqrt(square(real(m1))+square(imag(m1))));
VERIFY_IS_APPROX(abs(m1), sqrt(square(m1.real())+square(m1.imag())));
VERIFY_IS_APPROX(abs(m1), sqrt(abs2(m1)));
VERIFY_IS_APPROX(log10(m1), log(m1)/log(10));
@ -446,7 +446,7 @@ template<typename ArrayType> void min_max(const ArrayType& m)
}
void test_array()
void test_array_cwise()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( array(Array<float, 1, 1>()) );

View File

@ -28,9 +28,13 @@
template<typename MatrixType>
void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
{
MatrixType m = a;
if(pickrandom)
MatrixType m;
if(pickrandom) {
m.resizeLike(a);
svd_fill_random(m);
}
else
m = a;
CALL_SUBTEST(( svd_test_all_computation_options<BDCSVD<MatrixType> >(m, false) ));
}

View File

@ -20,6 +20,8 @@ template<typename MatrixType> struct Wrapper
inline operator MatrixType& () { return m_mat; }
};
enum my_sizes { M = 12, N = 7};
template<typename MatrixType> void ctor_init1(const MatrixType& m)
{
// Check logic in PlainObjectBase::_init1
@ -81,4 +83,16 @@ void test_constructor()
Array<float,3,3> a(123);
VERIFY_IS_EQUAL(a(4), 123.f);
}
{
MatrixXi m1(M,N);
VERIFY_IS_EQUAL(m1.rows(),M);
VERIFY_IS_EQUAL(m1.cols(),N);
ArrayXXi a1(M,N);
VERIFY_IS_EQUAL(a1.rows(),M);
VERIFY_IS_EQUAL(a1.cols(),N);
VectorXi v1(M);
VERIFY_IS_EQUAL(v1.size(),M);
ArrayXi a2(M);
VERIFY_IS_EQUAL(a2.size(),M);
}
}

View File

@ -8,7 +8,7 @@ struct Foo
static Index object_limit;
int dummy;
Foo()
Foo() : dummy(0)
{
#ifdef EIGEN_EXCEPTIONS
// TODO: Is this the correct way to handle this?
@ -37,22 +37,33 @@ void test_ctorleak()
{
typedef Matrix<Foo, Dynamic, Dynamic> MatrixX;
typedef Matrix<Foo, Dynamic, 1> VectorX;
Foo::object_count = 0;
for(int i = 0; i < g_repeat; i++) {
Index rows = internal::random<Index>(2,EIGEN_TEST_MAX_SIZE), cols = internal::random<Index>(2,EIGEN_TEST_MAX_SIZE);
Foo::object_limit = internal::random<Index>(0, rows*cols - 2);
Foo::object_limit = rows*cols;
{
MatrixX r(rows, cols);
Foo::object_limit = r.size()+internal::random<Index>(0, rows*cols - 2);
std::cout << "object_limit =" << Foo::object_limit << std::endl;
#ifdef EIGEN_EXCEPTIONS
try
{
#endif
std::cout << "\nMatrixX m(" << rows << ", " << cols << ");\n";
MatrixX m(rows, cols);
if(internal::random<bool>()) {
std::cout << "\nMatrixX m(" << rows << ", " << cols << ");\n";
MatrixX m(rows, cols);
}
else {
std::cout << "\nMatrixX m(r);\n";
MatrixX m(r);
}
#ifdef EIGEN_EXCEPTIONS
VERIFY(false); // not reached if exceptions are enabled
}
catch (const Foo::Fail&) { /* ignore */ }
#endif
}
VERIFY_IS_EQUAL(Index(0), Foo::object_count);
{
@ -66,4 +77,5 @@ void test_ctorleak()
}
VERIFY_IS_EQUAL(Index(0), Foo::object_count);
}
std::cout << "\n";
}

View File

@ -67,7 +67,7 @@ template<typename MatrixType> void eigensolver(const MatrixType& m)
// Test matrix with NaN
a(0,0) = std::numeric_limits<typename MatrixType::RealScalar>::quiet_NaN();
EigenSolver<MatrixType> eiNaN(a);
VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence);
VERIFY_IS_NOT_EQUAL(eiNaN.info(), Success);
}
// regression test for bug 1098

View File

@ -109,5 +109,7 @@ void memoryleak()
void test_exceptions()
{
CALL_SUBTEST( memoryleak() );
EIGEN_TRY {
CALL_SUBTEST( memoryleak() );
} EIGEN_CATCH(...) {}
}

View File

@ -43,11 +43,11 @@ void check_inf_nan(bool dryrun) {
}
else
{
VERIFY( !(numext::isfinite)(m(3)) );
VERIFY( !(numext::isinf)(m(3)) );
VERIFY( (numext::isnan)(m(3)) );
VERIFY( !m.allFinite() );
VERIFY( m.hasNaN() );
if( (std::isfinite)(m(3))) g_test_level=1; VERIFY( !(numext::isfinite)(m(3)) ); g_test_level=0;
if( (std::isinf) (m(3))) g_test_level=1; VERIFY( !(numext::isinf)(m(3)) ); g_test_level=0;
if(!(std::isnan) (m(3))) g_test_level=1; VERIFY( (numext::isnan)(m(3)) ); g_test_level=0;
if( (std::isfinite)(m(3))) g_test_level=1; VERIFY( !m.allFinite() ); g_test_level=0;
if(!(std::isnan) (m(3))) g_test_level=1; VERIFY( m.hasNaN() ); g_test_level=0;
}
T hidden_zero = (std::numeric_limits<T>::min)()*(std::numeric_limits<T>::min)();
m(4) /= hidden_zero;
@ -62,29 +62,29 @@ void check_inf_nan(bool dryrun) {
}
else
{
VERIFY( !(numext::isfinite)(m(4)) );
VERIFY( (numext::isinf)(m(4)) );
VERIFY( !(numext::isnan)(m(4)) );
VERIFY( !m.allFinite() );
VERIFY( m.hasNaN() );
if( (std::isfinite)(m(3))) g_test_level=1; VERIFY( !(numext::isfinite)(m(4)) ); g_test_level=0;
if(!(std::isinf) (m(3))) g_test_level=1; VERIFY( (numext::isinf)(m(4)) ); g_test_level=0;
if( (std::isnan) (m(3))) g_test_level=1; VERIFY( !(numext::isnan)(m(4)) ); g_test_level=0;
if( (std::isfinite)(m(3))) g_test_level=1; VERIFY( !m.allFinite() ); g_test_level=0;
if(!(std::isnan) (m(3))) g_test_level=1; VERIFY( m.hasNaN() ); g_test_level=0;
}
m(3) = 0;
if(dryrun)
{
std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),true); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), true); std::cout << "\n";
std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n";
std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), false); std::cout << "\n";
std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n";
std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), false); std::cout << "\n";
std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n";
std::cout << "hasNaN: "; check(m.hasNaN(), 0); std::cout << "\n";
std::cout << "\n\n";
}
else
{
VERIFY( (numext::isfinite)(m(3)) );
VERIFY( !(numext::isinf)(m(3)) );
VERIFY( !(numext::isnan)(m(3)) );
VERIFY( !m.allFinite() );
VERIFY( !m.hasNaN() );
if(!(std::isfinite)(m(3))) g_test_level=1; VERIFY( (numext::isfinite)(m(3)) ); g_test_level=0;
if( (std::isinf) (m(3))) g_test_level=1; VERIFY( !(numext::isinf)(m(3)) ); g_test_level=0;
if( (std::isnan) (m(3))) g_test_level=1; VERIFY( !(numext::isnan)(m(3)) ); g_test_level=0;
if( (std::isfinite)(m(3))) g_test_level=1; VERIFY( !m.allFinite() ); g_test_level=0;
if( (std::isnan) (m(3))) g_test_level=1; VERIFY( !m.hasNaN() ); g_test_level=0;
}
}

View File

@ -15,8 +15,9 @@
#include<iostream>
using namespace std;
// TODO not sure if this is actually still necessary anywhere ...
template<typename T> EIGEN_DONT_INLINE
void kill_extra_precision(T& x) { eigen_assert((void*)(&x) != (void*)0); }
void kill_extra_precision(T& ) { }
template<typename BoxType> void alignedbox(const BoxType& _box)

Some files were not shown because too many files have changed in this diff Show More