Upgrade to Eigen 3.3.6
parent
d56033b5a5
commit
a2f7389518
|
|
@ -41,10 +41,13 @@ string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_
|
|||
set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}")
|
||||
set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})
|
||||
|
||||
# if the mercurial program is absent, this will leave the EIGEN_HG_CHANGESET string empty,
|
||||
# but won't stop CMake.
|
||||
execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT)
|
||||
execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT)
|
||||
# if we are not in a mercurial clone
|
||||
if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.hg)
|
||||
# if the mercurial program is absent or this will leave the EIGEN_HG_CHANGESET string empty,
|
||||
# but won't stop CMake.
|
||||
execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT)
|
||||
execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT)
|
||||
endif()
|
||||
|
||||
# if this is the default (aka development) branch, extract the mercurial changeset number from the hg tip output...
|
||||
if(EIGEN_BRANCH_OUTPUT MATCHES "default")
|
||||
|
|
@ -64,6 +67,33 @@ include(GNUInstallDirs)
|
|||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
|
||||
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
|
||||
|
||||
|
||||
macro(ei_add_cxx_compiler_flag FLAG)
|
||||
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
||||
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
||||
check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
|
||||
if(COMPILER_SUPPORT_${SFLAG})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
|
||||
endif()
|
||||
endmacro(ei_add_cxx_compiler_flag)
|
||||
|
||||
check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
|
||||
|
||||
if(EIGEN_TEST_CXX11)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
if(EIGEN_COMPILER_SUPPORT_CPP11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
endif()
|
||||
else()
|
||||
#set(CMAKE_CXX_STANDARD 03)
|
||||
#set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
ei_add_cxx_compiler_flag("-std=c++03")
|
||||
endif()
|
||||
|
||||
#############################################################################
|
||||
# find how to link to the standard libraries #
|
||||
#############################################################################
|
||||
|
|
@ -115,15 +145,6 @@ endif()
|
|||
|
||||
set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
|
||||
|
||||
macro(ei_add_cxx_compiler_flag FLAG)
|
||||
string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
|
||||
string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
|
||||
check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
|
||||
if(COMPILER_SUPPORT_${SFLAG})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
|
||||
endif()
|
||||
endmacro(ei_add_cxx_compiler_flag)
|
||||
|
||||
if(NOT MSVC)
|
||||
# We assume that other compilers are partly compatible with GNUCC
|
||||
|
||||
|
|
@ -359,8 +380,6 @@ if(EIGEN_TEST_NO_EXCEPTIONS)
|
|||
message(STATUS "Disabling exceptions in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
|
||||
|
||||
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
|
@ -416,16 +435,15 @@ add_subdirectory(Eigen)
|
|||
|
||||
add_subdirectory(doc EXCLUDE_FROM_ALL)
|
||||
|
||||
include(EigenConfigureTesting)
|
||||
option(BUILD_TESTING "Enable creation of Eigen tests." ON)
|
||||
if(BUILD_TESTING)
|
||||
include(EigenConfigureTesting)
|
||||
|
||||
# fixme, not sure this line is still needed:
|
||||
enable_testing() # must be called from the root CMakeLists, see man page
|
||||
|
||||
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
|
||||
else()
|
||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
|
||||
else()
|
||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
|
|
@ -461,7 +479,9 @@ endif(NOT WIN32)
|
|||
|
||||
configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
|
||||
|
||||
ei_testing_print_summary()
|
||||
if(BUILD_TESTING)
|
||||
ei_testing_print_summary()
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}")
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@
|
|||
## # The following are required to uses Dart and the Cdash dashboard
|
||||
## ENABLE_TESTING()
|
||||
## INCLUDE(CTest)
|
||||
set(CTEST_PROJECT_NAME "Eigen3.3")
|
||||
set(CTEST_PROJECT_NAME "Eigen 3.3")
|
||||
set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "manao.inria.fr")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen+3.3")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "2000")
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "2000")
|
||||
list(APPEND CTEST_CUSTOM_ERROR_EXCEPTION @EIGEN_CTEST_ERROR_EXCEPTION@)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#define EIGEN_CHOLESKY_MODULE_H
|
||||
|
||||
#include "Core"
|
||||
#include "Jacobi"
|
||||
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
|
|
@ -31,7 +32,11 @@
|
|||
#include "src/Cholesky/LLT.h"
|
||||
#include "src/Cholesky/LDLT.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/Cholesky/LLT_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,22 @@
|
|||
// first thing Eigen does: stop the compiler from committing suicide
|
||||
#include "src/Core/util/DisableStupidWarnings.h"
|
||||
|
||||
#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
|
||||
#define EIGEN_CUDACC __CUDACC__
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
|
||||
#define EIGEN_CUDA_ARCH __CUDA_ARCH__
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
|
||||
#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
|
||||
#elif defined(__CUDACC_VER__)
|
||||
#define EIGEN_CUDACC_VER __CUDACC_VER__
|
||||
#else
|
||||
#define EIGEN_CUDACC_VER 0
|
||||
#endif
|
||||
|
||||
// Handle NVCC/CUDA/SYCL
|
||||
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
|
||||
// Do not try asserts on CUDA and SYCL!
|
||||
|
|
@ -37,9 +53,9 @@
|
|||
#endif
|
||||
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
// We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// works properly on the device side
|
||||
#include <math_functions.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
#endif
|
||||
|
|
@ -155,6 +171,9 @@
|
|||
#ifdef __AVX512DQ__
|
||||
#define EIGEN_VECTORIZE_AVX512DQ
|
||||
#endif
|
||||
#ifdef __AVX512ER__
|
||||
#define EIGEN_VECTORIZE_AVX512ER
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
|
@ -229,7 +248,7 @@
|
|||
#if defined __CUDACC__
|
||||
#define EIGEN_VECTORIZE_CUDA
|
||||
#include <vector_types.h>
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
|
||||
#if EIGEN_CUDACC_VER >= 70500
|
||||
#define EIGEN_HAS_CUDA_FP16
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -352,6 +371,7 @@ using std::ptrdiff_t;
|
|||
#include "src/Core/MathFunctions.h"
|
||||
#include "src/Core/GenericPacketMath.h"
|
||||
#include "src/Core/MathFunctionsImpl.h"
|
||||
#include "src/Core/arch/Default/ConjHelper.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX512
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
|
|
@ -367,6 +387,7 @@ using std::ptrdiff_t;
|
|||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#include "src/Core/arch/AVX/TypeCasting.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#elif defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
|
|
|
|||
|
|
@ -45,7 +45,11 @@
|
|||
#include "src/Eigenvalues/GeneralizedEigenSolver.h"
|
||||
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/Eigenvalues/RealSchur_LAPACKE.h"
|
||||
#include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
|
||||
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
|
||||
|
|
|
|||
|
|
@ -28,7 +28,11 @@
|
|||
#include "src/LU/FullPivLU.h"
|
||||
#include "src/LU/PartialPivLU.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/LU/PartialPivLU_LAPACKE.h"
|
||||
#endif
|
||||
#include "src/LU/Determinant.h"
|
||||
|
|
|
|||
|
|
@ -36,7 +36,11 @@
|
|||
#include "src/QR/ColPivHouseholderQR.h"
|
||||
#include "src/QR/CompleteOrthogonalDecomposition.h"
|
||||
#ifdef EIGEN_USE_LAPACKE
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/QR/HouseholderQR_LAPACKE.h"
|
||||
#include "src/QR/ColPivHouseholderQR_LAPACKE.h"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ void qFree(void *ptr)
|
|||
void *qRealloc(void *ptr, std::size_t size)
|
||||
{
|
||||
void* newPtr = Eigen::internal::aligned_malloc(size);
|
||||
memcpy(newPtr, ptr, size);
|
||||
std::memcpy(newPtr, ptr, size);
|
||||
Eigen::internal::aligned_free(ptr);
|
||||
return newPtr;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,11 @@
|
|||
#include "src/SVD/JacobiSVD.h"
|
||||
#include "src/SVD/BDCSVD.h"
|
||||
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
#include "mkl_lapacke.h"
|
||||
#else
|
||||
#include "src/misc/lapacke.h"
|
||||
#endif
|
||||
#include "src/SVD/JacobiSVD_LAPACKE.h"
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
|||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the matrix.appears to be negative.
|
||||
* \c NumericalIssue if the factorization failed because of a zero pivot.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
|
|
@ -305,7 +305,8 @@ template<> struct ldlt_inplace<Lower>
|
|||
if (size <= 1)
|
||||
{
|
||||
transpositions.setIdentity();
|
||||
if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
if(size==0) sign = ZeroSign;
|
||||
else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
|
||||
else sign = ZeroSign;
|
||||
return true;
|
||||
|
|
@ -376,6 +377,8 @@ template<> struct ldlt_inplace<Lower>
|
|||
|
||||
if((rs>0) && pivot_is_valid)
|
||||
A21 /= realAkk;
|
||||
else if(rs>0)
|
||||
ret = ret && (A21.array()==Scalar(0)).all();
|
||||
|
||||
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
|
||||
else if(!pivot_is_valid) found_zero_pivot = true;
|
||||
|
|
@ -568,13 +571,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
|
|||
// more precisely, use pseudo-inverse of D (see bug 241)
|
||||
using std::abs;
|
||||
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
|
||||
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
|
||||
// as motivated by LAPACK's xGELSS:
|
||||
// In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
|
||||
// and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
|
||||
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
|
||||
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
|
||||
// diagonal element is not well justified and leads to numerical issues in some cases.
|
||||
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
|
||||
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
|
||||
// Using numeric_limits::min() gives us more robustness to denormals.
|
||||
RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
|
||||
|
||||
for (Index i = 0; i < vecD.size(); ++i)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
|||
*
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
|
||||
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
|
||||
|
|
@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
|
|||
* Example: \include LLT_example.cpp
|
||||
* Output: \verbinclude LLT_example.out
|
||||
*
|
||||
* \b Performance: for best performance, it is recommended to use a column-major storage format
|
||||
* with the Lower triangular part (the default), or, equivalently, a row-major storage format
|
||||
* with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
|
||||
* step, and rank-updates can be up to 3 times slower.
|
||||
*
|
||||
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
|
||||
*
|
||||
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
|
||||
* Therefore, the strict lower part does not have to store correct values.
|
||||
*
|
||||
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
|
||||
*/
|
||||
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
|
||||
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
|
||||
* the strict lower part does not have to store correct values.
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LLT
|
||||
{
|
||||
public:
|
||||
|
|
@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
|||
}
|
||||
|
||||
template<typename Derived>
|
||||
void solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
void solveInPlace(const MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
template<typename InputType>
|
||||
LLT& compute(const EigenBase<InputType>& matrix);
|
||||
|
|
@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
|
|||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the matrix.appears to be negative.
|
||||
* \c NumericalIssue if the matrix.appears not to be positive definite.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
|
|
@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
|
|||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
m_matrix = a.derived();
|
||||
if (!internal::is_same_dense(m_matrix, a.derived()))
|
||||
m_matrix = a.derived();
|
||||
|
||||
// Compute matrix L1 norm = max abs column sum.
|
||||
m_l1_norm = RealScalar(0);
|
||||
|
|
@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
|||
*
|
||||
* This version avoids a copy when the right hand side matrix b is not needed anymore.
|
||||
*
|
||||
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
|
||||
* This function will const_cast it, so constness isn't honored here.
|
||||
*
|
||||
* \sa LLT::solve(), MatrixBase::llt()
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows()==bAndX.rows());
|
||||
|
|
|
|||
|
|
@ -153,8 +153,6 @@ class Array
|
|||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
|
|
|
|||
|
|
@ -84,7 +84,8 @@ class vml_assign_traits
|
|||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
resize_if_allowed(dst, src, func); \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
||||
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
||||
|
|
@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
|||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
resize_if_allowed(dst, src, func); \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco
|
|||
{
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
if (dec.rows() == 0) return RealScalar(1);
|
||||
if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
|
||||
if (matrix_norm == RealScalar(0)) return RealScalar(0);
|
||||
if (dec.rows() == 1) return RealScalar(1);
|
||||
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
|
||||
|
|
|
|||
|
|
@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
|||
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
|
||||
? int(outer_stride_at_compile_time<ArgType>::ret)
|
||||
: int(inner_stride_at_compile_time<ArgType>::ret),
|
||||
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
|
||||
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
|
||||
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
||||
|
|
@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
|||
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
|
||||
|
||||
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
|
||||
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
|
||||
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
|
||||
&& (OuterStrideAtCompileTime!=0)
|
||||
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
|
||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
|
||||
};
|
||||
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
|
||||
|
|
@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
|||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
|
||||
: m_argImpl(block.nestedExpression()),
|
||||
m_startRow(block.startRow()),
|
||||
m_startCol(block.startCol())
|
||||
m_startCol(block.startCol()),
|
||||
m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
|
||||
{ }
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime,
|
||||
ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
|
|
@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
|||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.coeff(m_linear_offset.value() + index);
|
||||
else
|
||||
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
|
|
@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
|||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Scalar& coeffRef(Index index)
|
||||
{
|
||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.coeffRef(m_linear_offset.value() + index);
|
||||
else
|
||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
|
|
@ -1063,8 +1073,11 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
|||
EIGEN_STRONG_INLINE
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
|
||||
else
|
||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int StoreMode, typename PacketType>
|
||||
|
|
@ -1078,15 +1091,19 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
|||
EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
if (ForwardLinearAccess)
|
||||
return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
|
||||
else
|
||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
}
|
||||
|
||||
protected:
|
||||
evaluator<ArgType> m_argImpl;
|
||||
const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
|
||||
const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
|
||||
const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
|
||||
};
|
||||
|
||||
// TODO: This evaluator does not actually use the child evaluator;
|
||||
|
|
|
|||
|
|
@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
|||
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
|
||||
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
|
||||
{
|
||||
eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
|
||||
}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,8 @@ struct dot_nocheck
|
|||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
EIGEN_STRONG_INLINE
|
||||
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
|
|
@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
|
|||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
EIGEN_STRONG_INLINE
|
||||
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.transpose().template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
|
|
@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true>
|
|||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
|
|
@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
|
|||
* \sa lpNorm(), dot(), squaredNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
{
|
||||
return numext::sqrt(squaredNorm());
|
||||
}
|
||||
|
|
@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
|
|||
* \sa norm(), normalize()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::PlainObject
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::normalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,2>::type _Nested;
|
||||
|
|
@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const
|
|||
* \sa norm(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::normalize()
|
||||
EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
|
||||
{
|
||||
RealScalar z = squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
|
|
@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
|
|||
* \sa stableNorm(), stableNormalize(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::PlainObject
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::stableNormalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,3>::type _Nested;
|
||||
|
|
@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
|
|||
* \sa stableNorm(), stableNormalized(), normalize()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::stableNormalize()
|
||||
EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
|
||||
{
|
||||
RealScalar w = cwiseAbs().maxCoeff();
|
||||
RealScalar z = (derived()/w).squaredNorm();
|
||||
|
|
|
|||
|
|
@ -24,12 +24,17 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
|
|||
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
{
|
||||
enum { is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
|
||||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
enum {
|
||||
#ifndef EIGEN_CUDA_ARCH
|
||||
is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
|
||||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
|
||||
#else
|
||||
is_large = 0,
|
||||
#endif
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
};
|
||||
};
|
||||
|
||||
|
|
@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
|
|||
*
|
||||
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
|
||||
*/
|
||||
#ifndef __CUDACC__
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const Product<Derived, OtherDerived>
|
||||
|
|
@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
|||
return Product<Derived, OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
|
||||
#endif // __CUDACC__
|
||||
|
||||
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
|
||||
*
|
||||
* The returned product will behave like any other expressions: the coefficients of the product will be
|
||||
|
|
|
|||
|
|
@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
|||
{
|
||||
typedef traits<PlainObjectType> TraitsBase;
|
||||
enum {
|
||||
PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
|
||||
? PlainObjectType::ColsAtCompileTime
|
||||
: PlainObjectType::RowsAtCompileTime,
|
||||
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::InnerStrideAtCompileTime)
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::OuterStrideAtCompileTime)
|
||||
? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
|
||||
? Dynamic
|
||||
: int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
|
|
@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
|
|||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||
: IsVectorAtCompileTime ? this->size()
|
||||
: int(Flags)&RowMajorBit ? this->cols()
|
||||
: this->rows();
|
||||
return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
|
||||
: int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
|
||||
: IsVectorAtCompileTime ? (this->size() * innerStride())
|
||||
: (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
|
||||
: (this->rows() * innerStride());
|
||||
}
|
||||
|
||||
/** Constructor in the fixed-size case.
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||
enum {
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
|
||||
SizeAtCompileTime = Base::SizeAtCompileTime
|
||||
};
|
||||
|
||||
|
|
@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
||||
{
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
|
||||
const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
|
||||
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|
||||
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
|| (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -348,31 +348,7 @@ struct norm1_retval
|
|||
* Implementation of hypot *
|
||||
****************************************************************************/
|
||||
|
||||
template<typename Scalar>
|
||||
struct hypot_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
RealScalar _x = abs(x);
|
||||
RealScalar _y = abs(y);
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar> struct hypot_impl;
|
||||
|
||||
template<typename Scalar>
|
||||
struct hypot_retval
|
||||
|
|
@ -495,7 +471,7 @@ namespace std_fallback {
|
|||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
EIGEN_USING_STD_MATH(log);
|
||||
Scalar x1p = RealScalar(1) + x;
|
||||
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
|
||||
return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -641,20 +617,27 @@ struct random_default_impl<Scalar, false, true>
|
|||
{
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
|
||||
if(y<x)
|
||||
if (y <= x)
|
||||
return x;
|
||||
// the following difference might overflow on a 32 bits system,
|
||||
// but since y>=x the result converted to an unsigned long is still correct.
|
||||
std::size_t range = ScalarX(y)-ScalarX(x);
|
||||
std::size_t offset = 0;
|
||||
// rejection sampling
|
||||
std::size_t divisor = 1;
|
||||
std::size_t multiplier = 1;
|
||||
if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
|
||||
else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
|
||||
// ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
|
||||
typedef typename make_unsigned<Scalar>::type ScalarU;
|
||||
// ScalarX is the widest of ScalarU and unsigned int.
|
||||
// We'll deal only with ScalarX and unsigned int below thus avoiding signed
|
||||
// types and arithmetic and signed overflows (which are undefined behavior).
|
||||
typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
|
||||
// The following difference doesn't overflow, provided our integer types are two's
|
||||
// complement and have the same number of padding bits in signed and unsigned variants.
|
||||
// This is the case in most modern implementations of C++.
|
||||
ScalarX range = ScalarX(y) - ScalarX(x);
|
||||
ScalarX offset = 0;
|
||||
ScalarX divisor = 1;
|
||||
ScalarX multiplier = 1;
|
||||
const unsigned rand_max = RAND_MAX;
|
||||
if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);
|
||||
else multiplier = 1 + range / (rand_max + 1);
|
||||
// Rejection sampling.
|
||||
do {
|
||||
offset = (std::size_t(std::rand()) * multiplier) / divisor;
|
||||
offset = (unsigned(std::rand()) * multiplier) / divisor;
|
||||
} while (offset > range);
|
||||
return Scalar(ScalarX(x) + offset);
|
||||
}
|
||||
|
|
@ -1030,7 +1013,8 @@ inline int log2(int x)
|
|||
|
||||
/** \returns the square root of \a x.
|
||||
*
|
||||
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
|
||||
* It is essentially equivalent to
|
||||
* \code using std::sqrt; return sqrt(x); \endcode
|
||||
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
|
||||
* specializations when SSE is enabled.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
|
|||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
template<typename RealScalar>
|
||||
EIGEN_STRONG_INLINE
|
||||
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
RealScalar p, qp;
|
||||
p = numext::maxi(x,y);
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
qp = numext::mini(y,x) / p;
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
struct hypot_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return positive_real_hypot<RealScalar>(abs(x), abs(y));
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
|
|
|||
|
|
@ -274,8 +274,6 @@ class Matrix
|
|||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
|
|
|
|||
|
|
@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
|
|||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,OtherDerived,LazyProduct>
|
||||
operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{ return this->lazyProduct(other); }
|
||||
#else
|
||||
|
||||
template<typename OtherDerived>
|
||||
const Product<Derived,OtherDerived>
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
#endif
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,OtherDerived,LazyProduct>
|
||||
|
|
@ -453,16 +444,24 @@ template<typename Derived> class MatrixBase
|
|||
///////// MatrixFunctions module /////////
|
||||
|
||||
typedef typename internal::stem_function<Scalar>::type StemFunction;
|
||||
const MatrixExponentialReturnValue<Derived> exp() const;
|
||||
#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name() const;
|
||||
#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name(Argument) const;
|
||||
|
||||
EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential)
|
||||
/** \brief Helper function for the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>.*/
|
||||
const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
|
||||
const MatrixFunctionReturnValue<Derived> cosh() const;
|
||||
const MatrixFunctionReturnValue<Derived> sinh() const;
|
||||
const MatrixFunctionReturnValue<Derived> cos() const;
|
||||
const MatrixFunctionReturnValue<Derived> sin() const;
|
||||
const MatrixSquareRootReturnValue<Derived> sqrt() const;
|
||||
const MatrixLogarithmReturnValue<Derived> log() const;
|
||||
const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
|
||||
const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
|
||||
|
|
|
|||
|
|
@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
|
||||
* \a data pointers.
|
||||
*
|
||||
* Here is an example using strides:
|
||||
* \include Matrix_Map_stride.cpp
|
||||
* Output: \verbinclude Matrix_Map_stride.out
|
||||
*
|
||||
* \see class Map
|
||||
*/
|
||||
//@{
|
||||
|
|
|
|||
|
|
@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
|||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
|
||||
EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
|
||||
|
|
@ -127,7 +127,7 @@ public:
|
|||
using Base::derived;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
|
||||
operator const Scalar() const
|
||||
EIGEN_STRONG_INLINE operator const Scalar() const
|
||||
{
|
||||
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
|
||||
}
|
||||
|
|
@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
|
|||
|
||||
public:
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
|
|
@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
|
|||
return internal::evaluator<Derived>(derived()).coeff(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
|
|||
typedef Product<Lhs, Rhs, Options> XprType;
|
||||
typedef product_evaluator<XprType> Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
|
||||
};
|
||||
|
||||
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
|
||||
|
|
@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
|||
const Product<Lhs, Rhs, DefaultProduct> > XprType;
|
||||
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
||||
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
|
||||
{}
|
||||
};
|
||||
|
|
@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
|
|||
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
|
||||
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
|
||||
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
|
||||
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
|
||||
xpr.index() ))
|
||||
|
|
@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
|
|||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
||||
{
|
||||
template<typename Dst>
|
||||
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
||||
};
|
||||
|
||||
|
|
@ -312,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
|||
};
|
||||
|
||||
template<typename Dst>
|
||||
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
||||
}
|
||||
|
|
@ -785,7 +785,11 @@ public:
|
|||
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
||||
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
|
||||
Alignment = evaluator<MatrixType>::Alignment
|
||||
Alignment = evaluator<MatrixType>::Alignment,
|
||||
|
||||
AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
|
||||
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|
||||
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
|
||||
};
|
||||
|
||||
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
||||
|
|
@ -797,7 +801,10 @@ public:
|
|||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
|
||||
{
|
||||
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
||||
if(AsScalarProduct)
|
||||
return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
|
||||
else
|
||||
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
|
|||
|
|
@ -407,7 +407,7 @@ protected:
|
|||
*/
|
||||
template<typename Derived>
|
||||
template<typename Func>
|
||||
typename internal::traits<Derived>::Scalar
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::redux(const Func& func) const
|
||||
{
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
|
|
|||
|
|
@ -95,6 +95,8 @@ protected:
|
|||
template<typename Expression>
|
||||
EIGEN_DEVICE_FUNC void construct(Expression& expr)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
|
||||
|
||||
if(PlainObjectType::RowsAtCompileTime==1)
|
||||
{
|
||||
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
||||
|
|
|
|||
|
|
@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
|||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
|
|
@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
|||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
|
||||
typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConjugateReturnType conjugate() const
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ namespace Eigen {
|
|||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
|
@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(co
|
|||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
|
@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(co
|
|||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
|
@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(co
|
|||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,6 +169,9 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
|
|||
OtherDerived& other = _other.const_cast_derived();
|
||||
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
|
||||
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
|
||||
// If solving for a 0x0 matrix, nothing to do, simply return.
|
||||
if (derived().cols() == 0)
|
||||
return;
|
||||
|
||||
enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
|
||||
typedef typename internal::conditional<copy,
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ MatrixBase<Derived>::stableNorm() const
|
|||
|
||||
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
|
||||
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
|
||||
DerivedCopy copy(derived());
|
||||
const DerivedCopy copy(derived());
|
||||
|
||||
enum {
|
||||
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|
||||
|
|
|
|||
|
|
@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
|
|||
const Product<OtherDerived, Transpose, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
|
||||
{
|
||||
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
|
||||
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
|
||||
}
|
||||
|
||||
/** \returns the \a matrix with the inverse transpositions applied to the rows.
|
||||
|
|
|
|||
|
|
@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet8f, Packet4cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
|
||||
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
|
|
@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4d, Packet2cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
|
||||
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -159,11 +159,12 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
|||
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and gcc stupidly generates a vfmadd132ps instruction,
|
||||
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
|
||||
// the result of the product.
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and even register spilling with clang>=6.0 (bug 1637).
|
||||
// Gcc stupidly generates a vfmadd132ps instruction.
|
||||
// So let's enforce it to generate a vfmadd231ps instruction since the most common use
|
||||
// case is to accumulate the result of the product.
|
||||
Packet8f res = c;
|
||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
|
|
@ -172,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
|
|||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
|
|
@ -308,9 +309,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
|||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
|
|
@ -333,9 +334,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
|
|||
{
|
||||
__m256d tmp = _mm256_shuffle_pd(a,a,5);
|
||||
return _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
|
||||
#if 0
|
||||
// This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
|
||||
// exhibit the same latency/throughput, but it is here for future reference/benchmarking...
|
||||
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
|
||||
return _mm256_permute_pd(swap_halves,5);
|
||||
#endif
|
||||
}
|
||||
|
||||
// pabs should be ok
|
||||
|
|
|
|||
|
|
@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
|
|||
// x = x + x - 1.0;
|
||||
// } else { x = x - 1.0; }
|
||||
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
|
||||
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
|
||||
Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
|
||||
x = psub(x, p16f_1);
|
||||
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
|
||||
e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet16f x2 = pmul(x, x);
|
||||
|
|
@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
|
|||
x = padd(x, y2);
|
||||
|
||||
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
||||
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
|
||||
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
|
||||
return _mm512_mask_blend_ps(iszero_mask,
|
||||
_mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
|
||||
p16f_minus_inf);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
|
|||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
|
||||
_mm512_setzero_ps());
|
||||
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
|
@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
|
|||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
|
||||
_mm512_setzero_pd());
|
||||
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
|
@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
|
|||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
|
||||
_mm512_rsqrt14_ps(_x));
|
||||
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
|
||||
Packet16f infs_and_nans = _mm512_mask_blend_ps(
|
||||
neg_mask, p16f_nan,
|
||||
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
|
||||
neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
|
||||
return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
|
@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
|
|||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
|
||||
_mm512_rsqrt14_pd(_x));
|
||||
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
|
||||
Packet8d infs_and_nans = _mm512_mask_blend_pd(
|
||||
neg_mask, p8d_nan,
|
||||
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
|
||||
neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
|
@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
|
|||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
|
||||
return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
|
||||
}
|
||||
#else
|
||||
#elif defined(EIGEN_VECTORIZE_AVX512ER)
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_rsqrt28_ps(x);
|
||||
|
|
|
|||
|
|
@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
|
|||
pstore(to, pa);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
|
||||
|
|
@ -648,13 +648,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
|
|||
template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
|
||||
{
|
||||
// _mm512_abs_ps intrinsic not found, so hack around it
|
||||
return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
|
||||
return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
|
||||
// _mm512_abs_ps intrinsic not found, so hack around it
|
||||
return (__m512d)_mm512_and_si512((__m512i)a,
|
||||
_mm512_set1_epi64(0x7fffffffffffffff));
|
||||
return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
|
||||
_mm512_set1_epi64(0x7fffffffffffffff)));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
|
|
|
|||
|
|
@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
|
|
@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
|
|||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
#ifdef __VSX__
|
||||
Packet4f ret;
|
||||
__asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
#else
|
||||
return vec_min(a, b);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
#ifdef __VSX__
|
||||
Packet4f ret;
|
||||
__asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
#else
|
||||
return vec_max(a, b);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
|
||||
|
|
@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
|
|||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
Packet2d ret;
|
||||
__asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
Packet2d ret;
|
||||
__asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
|
||||
|
||||
|
|
@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
|||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
|
||||
Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
#endif // __VSX__
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
// type Eigen::half (inheriting from CUDA's __half struct) with
|
||||
// operator overloads such that it behaves basically as an arithmetic
|
||||
// type. It will be quite slow on CPUs (so it is recommended to stay
|
||||
// in fp32 for CPUs, except for simple parameter conversions, I/O
|
||||
// in float32_bits for CPUs, except for simple parameter conversions, I/O
|
||||
// to disk and the likes), but fast on GPUs.
|
||||
|
||||
|
||||
|
|
@ -50,38 +50,45 @@ struct half;
|
|||
namespace half_impl {
|
||||
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
|
||||
// Make our own __half definition that is similar to CUDA's.
|
||||
struct __half {
|
||||
EIGEN_DEVICE_FUNC __half() {}
|
||||
explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
|
||||
// Make our own __half_raw definition that is similar to CUDA's.
|
||||
struct __half_raw {
|
||||
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
|
||||
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
|
||||
typedef __half __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
|
||||
|
||||
struct half_base : public __half {
|
||||
struct half_base : public __half_raw {
|
||||
EIGEN_DEVICE_FUNC half_base() {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace half_impl
|
||||
|
||||
// Class definition.
|
||||
struct half : public half_impl::half_base {
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
typedef half_impl::__half __half;
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
|
||||
typedef half_impl::__half_raw __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC half() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
#endif
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC half(bool b)
|
||||
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
|
||||
|
|
@ -138,71 +145,125 @@ struct half : public half_impl::half_base {
|
|||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct numeric_limits<Eigen::half> {
|
||||
static const bool is_specialized = true;
|
||||
static const bool is_signed = true;
|
||||
static const bool is_integer = false;
|
||||
static const bool is_exact = false;
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = denorm_present;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = std::round_to_nearest;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = false;
|
||||
static const bool is_modulo = false;
|
||||
static const int digits = 11;
|
||||
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int radix = 2;
|
||||
static const int min_exponent = -13;
|
||||
static const int min_exponent10 = -4;
|
||||
static const int max_exponent = 16;
|
||||
static const int max_exponent10 = 4;
|
||||
static const bool traps = true;
|
||||
static const bool tinyness_before = false;
|
||||
|
||||
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
||||
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
||||
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
||||
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
||||
static Eigen::half round_error() { return Eigen::half(0.5); }
|
||||
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
||||
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
||||
};
|
||||
|
||||
// If std::numeric_limits<T> is specialized, should also specialize
|
||||
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
||||
// std::numeric_limits<const volatile T>
|
||||
// https://stackoverflow.com/a/16519653/
|
||||
template<>
|
||||
struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
} // end namespace std
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace half_impl {
|
||||
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
|
||||
// Intrinsics for native fp16 support. Note that on current hardware,
|
||||
// these are no faster than fp32 arithmetic (you need to use the half2
|
||||
// these are no faster than float32_bits arithmetic (you need to use the half2
|
||||
// versions to get the ALU speed increased), but you do save the
|
||||
// conversion steps back and forth.
|
||||
|
||||
__device__ half operator + (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
|
||||
return __hadd(a, b);
|
||||
}
|
||||
__device__ half operator * (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
|
||||
return __hmul(a, b);
|
||||
}
|
||||
__device__ half operator - (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
|
||||
return __hsub(a, b);
|
||||
}
|
||||
__device__ half operator / (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
|
||||
float num = __half2float(a);
|
||||
float denom = __half2float(b);
|
||||
return __float2half(num / denom);
|
||||
}
|
||||
__device__ half operator - (const half& a) {
|
||||
EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
|
||||
return __hneg(a);
|
||||
}
|
||||
__device__ half& operator += (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator *= (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator -= (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
|
||||
a = a - b;
|
||||
return a;
|
||||
}
|
||||
__device__ half& operator /= (half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
|
||||
a = a / b;
|
||||
return a;
|
||||
}
|
||||
__device__ bool operator == (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
|
||||
return __heq(a, b);
|
||||
}
|
||||
__device__ bool operator != (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
|
||||
return __hne(a, b);
|
||||
}
|
||||
__device__ bool operator < (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
|
||||
return __hlt(a, b);
|
||||
}
|
||||
__device__ bool operator <= (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
|
||||
return __hle(a, b);
|
||||
}
|
||||
__device__ bool operator > (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
|
||||
return __hgt(a, b);
|
||||
}
|
||||
__device__ bool operator >= (const half& a, const half& b) {
|
||||
EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
|
||||
return __hge(a, b);
|
||||
}
|
||||
|
||||
#else // Emulate support for half floats
|
||||
|
||||
// Definitions for CPUs and older CUDA, mostly working through conversion
|
||||
// to/from fp32.
|
||||
// to/from float32_bits.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
|
||||
return half(float(a) + float(b));
|
||||
|
|
@ -238,10 +299,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
|
|||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
|
||||
return float(a) == float(b);
|
||||
return numext::equal_strict(float(a),float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
|
||||
return float(a) != float(b);
|
||||
return numext::not_equal_strict(float(a), float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
|
||||
return float(a) < float(b);
|
||||
|
|
@ -269,34 +330,35 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
|
|||
// these in hardware. If we need more performance on older/other CPUs, they are
|
||||
// also possible to vectorize directly.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
|
||||
__half h;
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
|
||||
__half_raw h;
|
||||
h.x = x;
|
||||
return h;
|
||||
}
|
||||
|
||||
union FP32 {
|
||||
union float32_bits {
|
||||
unsigned int u;
|
||||
float f;
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__half tmp_ff = __float2half(ff);
|
||||
return *(__half_raw*)&tmp_ff;
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
__half h;
|
||||
__half_raw h;
|
||||
h.x = _cvtss_sh(ff, 0);
|
||||
return h;
|
||||
|
||||
#else
|
||||
FP32 f; f.f = ff;
|
||||
float32_bits f; f.f = ff;
|
||||
|
||||
const FP32 f32infty = { 255 << 23 };
|
||||
const FP32 f16max = { (127 + 16) << 23 };
|
||||
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
const float32_bits f32infty = { 255 << 23 };
|
||||
const float32_bits f16max = { (127 + 16) << 23 };
|
||||
const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
unsigned int sign_mask = 0x80000000u;
|
||||
__half o;
|
||||
__half_raw o;
|
||||
o.x = static_cast<unsigned short>(0x0u);
|
||||
|
||||
unsigned int sign = f.u & sign_mask;
|
||||
|
|
@ -335,17 +397,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
|||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
return __half2float(h);
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
return _cvtsh_ss(h.x);
|
||||
|
||||
#else
|
||||
const FP32 magic = { 113 << 23 };
|
||||
const float32_bits magic = { 113 << 23 };
|
||||
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
FP32 o;
|
||||
float32_bits o;
|
||||
|
||||
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
|
||||
unsigned int exp = shifted_exp & o.u; // just the exponent
|
||||
|
|
@ -370,7 +432,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
|
|||
return (a.x & 0x7fff) == 0x7c00;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hisnan(a);
|
||||
#else
|
||||
return (a.x & 0x7fff) > 0x7c00;
|
||||
|
|
@ -386,11 +448,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
|
|||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
|
||||
return half(::expf(float(a)));
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
return half(hexp(a));
|
||||
#else
|
||||
return half(::expf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
return Eigen::half(::hlog(a));
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return half(::hlog(a));
|
||||
#else
|
||||
return half(::logf(float(a)));
|
||||
#endif
|
||||
|
|
@ -402,7 +468,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
|
|||
return half(::log10f(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
|
||||
return half(::sqrtf(float(a)));
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
return half(hsqrt(a));
|
||||
#else
|
||||
return half(::sqrtf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
|
||||
return half(::powf(float(a), float(b)));
|
||||
|
|
@ -420,14 +490,22 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
|
|||
return half(::tanhf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
||||
return half(hfloor(a));
|
||||
#else
|
||||
return half(::floorf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
||||
return half(hceil(a));
|
||||
#else
|
||||
return half(::ceilf(float(a)));
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(b, a) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
|
|
@ -436,7 +514,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
|||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(a, b) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
|
|
@ -474,49 +552,6 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
|
|||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct numeric_limits<Eigen::half> {
|
||||
static const bool is_specialized = true;
|
||||
static const bool is_signed = true;
|
||||
static const bool is_integer = false;
|
||||
static const bool is_exact = false;
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = denorm_present;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = std::round_to_nearest;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = false;
|
||||
static const bool is_modulo = false;
|
||||
static const int digits = 11;
|
||||
static const int digits10 = 2;
|
||||
//static const int max_digits10 = ;
|
||||
static const int radix = 2;
|
||||
static const int min_exponent = -13;
|
||||
static const int min_exponent10 = -4;
|
||||
static const int max_exponent = 16;
|
||||
static const int max_exponent10 = 4;
|
||||
static const bool traps = true;
|
||||
static const bool tinyness_before = false;
|
||||
|
||||
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
||||
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
||||
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
||||
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
||||
static Eigen::half round_error() { return Eigen::half(0.5); }
|
||||
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
||||
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
||||
};
|
||||
}
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<> struct NumTraits<Eigen::half>
|
||||
: GenericNumTraits<Eigen::half>
|
||||
{
|
||||
|
|
@ -557,7 +592,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
|
|||
return Eigen::half(::expf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return Eigen::half(::hlog(a));
|
||||
#else
|
||||
return Eigen::half(::logf(float(a)));
|
||||
|
|
@ -591,14 +626,18 @@ struct hash<Eigen::half> {
|
|||
|
||||
|
||||
// Add the missing shfl_xor intrinsic
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
||||
#if EIGEN_CUDACC_VER < 90000
|
||||
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
||||
#else
|
||||
return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// ldg() has an overload for __half, but we also need one for Eigen::half.
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
||||
return Eigen::half_impl::raw_uint16_to_half(
|
||||
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
||||
|
|
@ -606,7 +645,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr)
|
|||
#endif
|
||||
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
namespace Eigen {
|
||||
namespace numext {
|
||||
|
||||
|
|
|
|||
|
|
@ -99,7 +99,8 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2&
|
|||
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
|
||||
half2 result;
|
||||
result.x = a.x & 0x7FFF7FFF;
|
||||
unsigned temp = *(reinterpret_cast<const unsigned*>(&(a)));
|
||||
*(reinterpret_cast<unsigned*>(&(result))) = temp & 0x7FFF7FFF;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -275,7 +276,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
|
|||
return __floats2half2_rn(r1, r2);
|
||||
}
|
||||
|
||||
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
|
||||
template<> __device__ EIGEN_STRONG_INLINE
|
||||
half2 plog<half2>(const half2& a) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ARCH_CONJ_HELPER_H
|
||||
#define EIGEN_ARCH_CONJ_HELPER_H
|
||||
|
||||
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
|
||||
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
|
||||
{ return padd(c, pmul(x,y)); } \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
|
||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
|
||||
}; \
|
||||
\
|
||||
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
|
||||
{ return padd(c, pmul(x,y)); } \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
|
||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
|
||||
};
|
||||
|
||||
#endif // EIGEN_ARCH_CONJ_HELPER_H
|
||||
|
|
@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
|
|||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
float32x2_t r64;
|
||||
r64 = vld1_f32((float *)&from);
|
||||
r64 = vld1_f32((const float *)&from);
|
||||
|
||||
return Packet2cf(vcombine_f32(r64, r64));
|
||||
}
|
||||
|
|
@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
|
|||
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
|
|
@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for NEON
|
||||
|
|
@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
|
|||
s = vmulq_f32(b.v, b.v);
|
||||
rev_s = vrev64q_f32(s);
|
||||
|
||||
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
|
||||
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
|
|
@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
|
|||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
|
||||
{
|
||||
|
|
@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for NEON
|
||||
|
|
|
|||
|
|
@ -36,12 +36,43 @@ namespace internal {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC
|
||||
|
||||
// In MSVC's arm_neon.h header file, all NEON vector types
|
||||
// are aliases to the same underlying type __n128.
|
||||
// We thus have to wrap them to make them different C++ types.
|
||||
// (See also bug 1428)
|
||||
|
||||
template<typename T,int unique_id>
|
||||
struct eigen_packet_wrapper
|
||||
{
|
||||
operator T&() { return m_val; }
|
||||
operator const T&() const { return m_val; }
|
||||
eigen_packet_wrapper() {}
|
||||
eigen_packet_wrapper(const T &v) : m_val(v) {}
|
||||
eigen_packet_wrapper& operator=(const T &v) {
|
||||
m_val = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T m_val;
|
||||
};
|
||||
typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
|
||||
typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
|
||||
typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i;
|
||||
typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i;
|
||||
typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
|
||||
|
||||
#else
|
||||
|
||||
typedef float32x2_t Packet2f;
|
||||
typedef float32x4_t Packet4f;
|
||||
typedef int32x4_t Packet4i;
|
||||
typedef int32x2_t Packet2i;
|
||||
typedef uint32x4_t Packet4ui;
|
||||
|
||||
#endif // EIGEN_COMP_MSVC
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
|
||||
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
|
|||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
|
|
@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
|
||||
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
|
|
@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
|
|||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
|
|
@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
|
||||
{ return padd(c, pmul(x,y)); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
|
||||
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
|
||||
};
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ namespace internal {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
|
||||
#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
|
|
@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
|
|||
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
|
||||
}
|
||||
|
||||
#if EIGEN_COMP_PGI
|
||||
typedef const void * SsePrefetchPtrType;
|
||||
#else
|
||||
typedef const char * SsePrefetchPtrType;
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
|
||||
|
|
@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
|
|||
|
||||
} // end namespace Eigen
|
||||
|
||||
#if EIGEN_COMP_PGI
|
||||
// PGI++ does not define the following intrinsics in C++ mode.
|
||||
static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); }
|
||||
static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
|
||||
static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); }
|
||||
static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); }
|
||||
static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); }
|
||||
static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_SSE_H
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ namespace Eigen {
|
|||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template <>
|
||||
struct type_casting_traits<float, int> {
|
||||
enum {
|
||||
|
|
@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
|
|||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return _mm_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<int, float> {
|
||||
enum {
|
||||
|
|
@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
|
|||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return _mm_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<double, float> {
|
||||
enum {
|
||||
|
|
@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
|
|||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
|
||||
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, double> {
|
||||
enum {
|
||||
|
|
@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
|
|||
TgtCoeffRatio = 2
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return _mm_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return _mm_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
|
||||
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
|
||||
// Simply discard the second half of the input
|
||||
|
|
|
|||
|
|
@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
|||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
|
|
|
|||
|
|
@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
|
|||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the hypot of two scalars
|
||||
* \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
|
||||
*
|
||||
* \sa MatrixBase::stableNorm(), class Redux
|
||||
*/
|
||||
|
|
@ -263,22 +263,15 @@ template<typename Scalar>
|
|||
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
|
||||
{
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
|
||||
// typedef typename NumTraits<Scalar>::Real result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
Scalar p, qp;
|
||||
if(_x>_y)
|
||||
{
|
||||
p = _x;
|
||||
qp = _y / p;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
return p * sqrt(Scalar(1) + qp*qp);
|
||||
// This functor is used by hypotNorm only for which it is faster to first apply abs
|
||||
// on all coefficients prior to reduction through hypot.
|
||||
// This way we avoid calling abs on positive and real entries, and this also permits
|
||||
// to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
|
||||
// through the same functor...
|
||||
return internal::positive_real_hypot(x,y);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
|
|
|
|||
|
|
@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
|
|||
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
#endif
|
||||
|
||||
#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
|
||||
// std::unary_negate is deprecated since c++17 and will be removed in c++20
|
||||
template<typename T>
|
||||
struct functor_traits<std::unary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
// std::binary_negate is deprecated since c++17 and will be removed in c++20
|
||||
template<typename T>
|
||||
struct functor_traits<std::binary_negate<T> >
|
||||
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_STDEXT_SUPPORT
|
||||
|
||||
|
|
|
|||
|
|
@ -1197,10 +1197,16 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||
EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
|
||||
RhsPacket B_0, B1, B2, B3, T0;
|
||||
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
// NOTE: the begin/end asm comments below work around bug 935!
|
||||
// but they are not enough for gcc>=6 without FMA (bug 1637)
|
||||
#if EIGEN_GNUC_AT_LEAST(6,0)
|
||||
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+rm" (A0),[a1] "+rm" (A1));
|
||||
#else
|
||||
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
|
||||
#endif
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
do { \
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
|
||||
traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
|
||||
traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
|
||||
|
|
@ -1212,6 +1218,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||
traits.madd(A1, B2, C6, B2); \
|
||||
traits.madd(A0, B3, C3, T0); \
|
||||
traits.madd(A1, B3, C7, B3); \
|
||||
EIGEN_GEBP_2PX4_SPILLING_WORKAROUND \
|
||||
EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
|
||||
} while(false)
|
||||
|
||||
|
|
@ -1526,10 +1533,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||
// The following piece of code wont work for 512 bit registers
|
||||
// Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
|
||||
// as nr (which is currently 4) for the return type.
|
||||
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
|
||||
const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;
|
||||
if ((SwappedTraits::LhsProgress % 4) == 0 &&
|
||||
(SwappedTraits::LhsProgress <= 8) &&
|
||||
(SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
|
||||
(SwappedTraits::LhsProgress!=8 || SResPacketHalfSize==nr))
|
||||
{
|
||||
SAccPacket C0, C1, C2, C3;
|
||||
straits.initAcc(C0);
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
|
|||
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
|
||||
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
|
||||
EIGTYPE beta(1); \
|
||||
BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
|
||||
BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
|
||||
} \
|
||||
};
|
||||
|
||||
|
|
@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
|
|||
} \
|
||||
};
|
||||
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
|
||||
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk)
|
||||
#else
|
||||
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
|
||||
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
|
||||
#endif
|
||||
|
||||
// TODO hanlde complex cases
|
||||
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ namespace internal {
|
|||
|
||||
// gemm specialization
|
||||
|
||||
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
|
||||
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
|
||||
template< \
|
||||
typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
|
|
@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
|
|||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} else b = _rhs; \
|
||||
\
|
||||
BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
}};
|
||||
|
||||
GEMM_SPECIALIZATION(double, d, double, d)
|
||||
GEMM_SPECIALIZATION(float, f, float, s)
|
||||
GEMM_SPECIALIZATION(dcomplex, cd, double, z)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, float, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
GEMM_SPECIALIZATION(double, d, double, dgemm)
|
||||
GEMM_SPECIALIZATION(float, f, float, sgemm)
|
||||
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm)
|
||||
#else
|
||||
GEMM_SPECIALIZATION(double, d, double, dgemm_)
|
||||
GEMM_SPECIALIZATION(float, f, float, sgemm_)
|
||||
GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_)
|
||||
#endif
|
||||
|
||||
} // end namespase internal
|
||||
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
|
|||
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
|
||||
|
||||
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
|
||||
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
|
||||
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
|
||||
{ \
|
||||
|
|
@ -113,14 +113,21 @@ static void run( \
|
|||
x_ptr=x_tmp.data(); \
|
||||
incx=1; \
|
||||
} else x_ptr=rhs; \
|
||||
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
}\
|
||||
};
|
||||
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
|
||||
#else
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
|
||||
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
|
||||
#endif
|
||||
|
||||
} // end namespase internal
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ namespace internal {
|
|||
|
||||
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
|
||||
|
||||
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
|
|
@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
|
|||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} else b = _rhs; \
|
||||
\
|
||||
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
|
|
@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
|
|||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} \
|
||||
\
|
||||
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_SYMM_L(double, double, d, d)
|
||||
EIGEN_BLAS_SYMM_L(float, float, f, s)
|
||||
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
|
||||
EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
|
||||
EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
|
||||
EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
|
||||
#else
|
||||
EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
|
||||
EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
|
||||
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
|
||||
EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
|
||||
#endif
|
||||
|
||||
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
|
||||
|
||||
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
|
|
@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
|
|||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} else b = _lhs; \
|
||||
\
|
||||
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
\
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
|
|
@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
|
|||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} \
|
||||
\
|
||||
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_SYMM_R(double, double, d, d)
|
||||
EIGEN_BLAS_SYMM_R(float, float, f, s)
|
||||
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
|
||||
EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
|
||||
EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
|
||||
EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
|
||||
#else
|
||||
EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
|
||||
EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
|
||||
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
|
||||
EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
|
||||
#endif
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
|
|
|||
|
|
@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
|
|||
x_tmp=map_x.conjugate(); \
|
||||
x_ptr=x_tmp.data(); \
|
||||
} else x_ptr=_rhs; \
|
||||
BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
|
||||
}\
|
||||
};
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
|
||||
#else
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
|
||||
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
|
|
|||
|
|
@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
|
||||
// To work around an "error: member reference base type 'Matrix<...>
|
||||
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
|
||||
// not a structure or union" compilation error in nvcc (tested V8.0.61),
|
||||
// create a dummy internal::constructor_without_unaligned_array_assert
|
||||
// object to pass to the Matrix constructor.
|
||||
internal::constructor_without_unaligned_array_assert a;
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
|
||||
triangularBuffer.setZero();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
|
|
@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
|
||||
internal::constructor_without_unaligned_array_assert a;
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
|
||||
triangularBuffer.setZero();
|
||||
if((Mode&ZeroDiag)==ZeroDiag)
|
||||
triangularBuffer.diagonal().setZero();
|
||||
|
|
@ -393,7 +400,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
|||
{
|
||||
template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
typedef typename Dest::Scalar Scalar;
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef typename Dest::Scalar Scalar;
|
||||
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
||||
|
|
@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
|||
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
|
||||
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(a_rhs);
|
||||
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
|
||||
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
|
||||
Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
|
||||
|
||||
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
||||
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
|
||||
|
|
@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
|||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha, blocking
|
||||
);
|
||||
|
||||
// Apply correction if the diagonal is unit and a scalar factor was nested:
|
||||
if ((Mode&UnitDiag)==UnitDiag)
|
||||
{
|
||||
if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
|
||||
{
|
||||
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
|
||||
dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
|
||||
}
|
||||
else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
|
||||
{
|
||||
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
|
||||
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
|
|||
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
|
||||
|
||||
// implements col-major += alpha * op(triangular) * op(general)
|
||||
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, int Mode, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
|
|
@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
|||
} \
|
||||
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
|
||||
/* call ?trmm*/ \
|
||||
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
\
|
||||
/* Add op(a_triangular)*b into res*/ \
|
||||
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
|
||||
|
|
@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
|||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMM_L(double, double, d, d)
|
||||
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMM_L(float, float, f, s)
|
||||
EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
|
||||
EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
|
||||
EIGEN_BLAS_TRMM_L(float, float, f, strmm)
|
||||
EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
|
||||
#else
|
||||
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
|
||||
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
|
||||
EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
|
||||
EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
|
||||
#endif
|
||||
|
||||
// implements col-major += alpha * op(general) * op(triangular)
|
||||
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
|
||||
template <typename Index, int Mode, \
|
||||
int LhsStorageOrder, bool ConjugateLhs, \
|
||||
int RhsStorageOrder, bool ConjugateRhs> \
|
||||
|
|
@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
|||
} \
|
||||
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
|
||||
/* call ?trmm*/ \
|
||||
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
|
||||
\
|
||||
/* Add op(a_triangular)*b into res*/ \
|
||||
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
|
||||
|
|
@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
|||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMM_R(double, double, d, d)
|
||||
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMM_R(float, float, f, s)
|
||||
EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
|
||||
EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
|
||||
EIGEN_BLAS_TRMM_R(float, float, f, strmm)
|
||||
EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
|
||||
#else
|
||||
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
|
||||
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
|
||||
EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
|
||||
EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
|
||||
#endif
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
|
|
|||
|
|
@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
|
|||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
||||
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
|
||||
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
|
||||
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
|
|
@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
|
|||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
|
||||
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
|
||||
{
|
||||
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
|
||||
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
|
|||
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
||||
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
|
||||
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
|
||||
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
|
||||
|
||||
enum {
|
||||
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
|
||||
|
|
@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
|
|||
actualRhsPtr,1,
|
||||
dest.data(),dest.innerStride(),
|
||||
actualAlpha);
|
||||
|
||||
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
|
||||
{
|
||||
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
|
||||
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
|
|||
EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
|
||||
|
||||
// implements col-major: res += alpha * op(triangular) * vector
|
||||
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
|
||||
enum { \
|
||||
|
|
@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
|||
diag = IsUnitDiag ? 'U' : 'N'; \
|
||||
\
|
||||
/* call ?TRMV*/ \
|
||||
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
\
|
||||
/* Add op(a_tr)rhs into res*/ \
|
||||
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
|
|
@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
|||
m = convert_index<BlasIndex>(size); \
|
||||
n = convert_index<BlasIndex>(cols-size); \
|
||||
} \
|
||||
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMV_CM(double, double, d, d)
|
||||
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMV_CM(float, float, f, s)
|
||||
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMV_CM(double, double, d, d,)
|
||||
EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
|
||||
EIGEN_BLAS_TRMV_CM(float, float, f, s,)
|
||||
EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,)
|
||||
#else
|
||||
EIGEN_BLAS_TRMV_CM(double, double, d, d, _)
|
||||
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
|
||||
EIGEN_BLAS_TRMV_CM(float, float, f, s, _)
|
||||
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _)
|
||||
#endif
|
||||
|
||||
// implements row-major: res += alpha * op(triangular) * vector
|
||||
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
|
||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
|
||||
enum { \
|
||||
|
|
@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
|||
diag = IsUnitDiag ? 'U' : 'N'; \
|
||||
\
|
||||
/* call ?TRMV*/ \
|
||||
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
|
||||
\
|
||||
/* Add op(a_tr)rhs into res*/ \
|
||||
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
|
||||
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
|
||||
if (size<(std::max)(rows,cols)) { \
|
||||
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
|
||||
|
|
@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
|||
m = convert_index<BlasIndex>(size); \
|
||||
n = convert_index<BlasIndex>(cols-size); \
|
||||
} \
|
||||
BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRMV_RM(double, double, d, d)
|
||||
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
|
||||
EIGEN_BLAS_TRMV_RM(float, float, f, s)
|
||||
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c)
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRMV_RM(double, double, d, d,)
|
||||
EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
|
||||
EIGEN_BLAS_TRMV_RM(float, float, f, s,)
|
||||
EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,)
|
||||
#else
|
||||
EIGEN_BLAS_TRMV_RM(double, double, d, d,_)
|
||||
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
|
||||
EIGEN_BLAS_TRMV_RM(float, float, f, s,_)
|
||||
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_)
|
||||
#endif
|
||||
|
||||
} // end namespase internal
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ namespace Eigen {
|
|||
namespace internal {
|
||||
|
||||
// implements LeftSide op(triangular)^-1 * general
|
||||
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
|
||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
||||
{ \
|
||||
|
|
@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
|
|||
} \
|
||||
if (IsUnitDiag) diag='U'; \
|
||||
/* call ?trsm*/ \
|
||||
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRSM_L(double, double, d)
|
||||
EIGEN_BLAS_TRSM_L(dcomplex, double, z)
|
||||
EIGEN_BLAS_TRSM_L(float, float, s)
|
||||
EIGEN_BLAS_TRSM_L(scomplex, float, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRSM_L(double, double, dtrsm)
|
||||
EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
|
||||
EIGEN_BLAS_TRSM_L(float, float, strsm)
|
||||
EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
|
||||
#else
|
||||
EIGEN_BLAS_TRSM_L(double, double, dtrsm_)
|
||||
EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
|
||||
EIGEN_BLAS_TRSM_L(float, float, strsm_)
|
||||
EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
|
||||
#endif
|
||||
|
||||
// implements RightSide general * op(triangular)^-1
|
||||
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
|
||||
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
|
||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
||||
{ \
|
||||
|
|
@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
|
|||
} \
|
||||
if (IsUnitDiag) diag='U'; \
|
||||
/* call ?trsm*/ \
|
||||
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
|
||||
/*std::cout << "TRMS_L specialization!\n";*/ \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_BLAS_TRSM_R(double, double, d)
|
||||
EIGEN_BLAS_TRSM_R(dcomplex, double, z)
|
||||
EIGEN_BLAS_TRSM_R(float, float, s)
|
||||
EIGEN_BLAS_TRSM_R(scomplex, float, c)
|
||||
|
||||
#ifdef EIGEN_USE_MKL
|
||||
EIGEN_BLAS_TRSM_R(double, double, dtrsm)
|
||||
EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
|
||||
EIGEN_BLAS_TRSM_R(float, float, strsm)
|
||||
EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm)
|
||||
#else
|
||||
EIGEN_BLAS_TRSM_R(double, double, dtrsm_)
|
||||
EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
|
||||
EIGEN_BLAS_TRSM_R(float, float, strsm_)
|
||||
EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_)
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
|
|
|||
|
|
@ -43,12 +43,20 @@
|
|||
#endif
|
||||
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
|
||||
|
||||
#elif defined __GNUC__ && __GNUC__>=6
|
||||
#elif defined __GNUC__
|
||||
|
||||
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
|
||||
#if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
|
||||
#pragma GCC diagnostic push
|
||||
#endif
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
// g++ warns about local variables shadowing member functions, which is too strict
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ < 8
|
||||
// Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
|
||||
#pragma GCC diagnostic ignored "-Wtype-limits"
|
||||
#endif
|
||||
#if __GNUC__>=6
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -49,10 +49,11 @@
|
|||
#define EIGEN_USE_LAPACKE
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_USE_MKL_VML)
|
||||
#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
|
||||
#define EIGEN_USE_MKL
|
||||
#endif
|
||||
|
||||
|
||||
#if defined EIGEN_USE_MKL
|
||||
# include <mkl.h>
|
||||
/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
|
||||
|
|
@ -108,6 +109,10 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
|
||||
#include "../../misc/blas.h"
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
typedef std::complex<double> dcomplex;
|
||||
|
|
@ -121,8 +126,5 @@ typedef int BlasIndex;
|
|||
|
||||
} // end namespace Eigen
|
||||
|
||||
#if defined(EIGEN_USE_BLAS)
|
||||
#include "../../misc/blas.h"
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_MKL_SUPPORT_H
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 3
|
||||
#define EIGEN_MINOR_VERSION 4
|
||||
#define EIGEN_MINOR_VERSION 6
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
|
|
@ -399,7 +399,7 @@
|
|||
// Does the compiler support variadic templates?
|
||||
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
|
||||
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
|
||||
&& (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
|
||||
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
|
||||
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
|
||||
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
||||
|
|
@ -413,7 +413,7 @@
|
|||
|
||||
#ifdef __CUDACC__
|
||||
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
|
||||
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
|
||||
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
|
||||
#define EIGEN_HAS_CONSTEXPR 1
|
||||
#endif
|
||||
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
|
||||
|
|
@ -487,11 +487,13 @@
|
|||
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
|
||||
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
|
||||
// but GCC is still doing fine with just inline.
|
||||
#ifndef EIGEN_STRONG_INLINE
|
||||
#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
|
||||
#define EIGEN_STRONG_INLINE __forceinline
|
||||
#else
|
||||
#define EIGEN_STRONG_INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
|
||||
// attribute to maximize inlining. This should only be used when really necessary: in particular,
|
||||
|
|
@ -812,7 +814,8 @@ namespace Eigen {
|
|||
// just an empty macro !
|
||||
#define EIGEN_EMPTY
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
|
||||
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
|
||||
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||
using Base::operator =;
|
||||
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||
|
|
@ -986,7 +989,13 @@ namespace Eigen {
|
|||
# define EIGEN_NOEXCEPT
|
||||
# define EIGEN_NOEXCEPT_IF(x)
|
||||
# define EIGEN_NO_THROW throw()
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
|
||||
# if EIGEN_COMP_MSVC
|
||||
// MSVC does not support exception specifications (warning C4290),
|
||||
// and they are deprecated in c++11 anyway.
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw()
|
||||
# else
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_MACROS_H
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
|
|||
throw std::bad_alloc();
|
||||
#else
|
||||
std::size_t huge = static_cast<std::size_t>(-1);
|
||||
new int[huge];
|
||||
::operator new(huge);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
|
|||
IntPtr size = IntPtr(end)-IntPtr(start);
|
||||
if(size==0) return;
|
||||
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
||||
memcpy(target, start, size);
|
||||
std::memcpy(target, start, size);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
|
|||
/** \class aligned_allocator
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief STL compatible allocator to use with with 16 byte aligned types
|
||||
* \brief STL compatible allocator to use with types requiring a non standrad alignment.
|
||||
*
|
||||
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
|
||||
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
|
||||
* - 32 bytes alignment if AVX is enabled.
|
||||
* - 64 bytes alignment if AVX512 is enabled.
|
||||
*
|
||||
* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
|
||||
* \link TopicPreprocessorDirectivesPerformance there \endlink.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
|
|
@ -739,7 +747,15 @@ public:
|
|||
pointer allocate(size_type num, const void* /*hint*/ = 0)
|
||||
{
|
||||
internal::check_size_for_overflow<T>(num);
|
||||
return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
|
||||
size_type size = num * sizeof(T);
|
||||
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
|
||||
// workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
||||
// It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
|
||||
if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
|
||||
return 0;
|
||||
else
|
||||
#endif
|
||||
return static_cast<pointer>( internal::aligned_malloc(size) );
|
||||
}
|
||||
|
||||
void deallocate(pointer p, size_type /*num*/)
|
||||
|
|
|
|||
|
|
@ -109,6 +109,28 @@ template<> struct is_integral<unsigned int> { enum { value = true }; };
|
|||
template<> struct is_integral<signed long> { enum { value = true }; };
|
||||
template<> struct is_integral<unsigned long> { enum { value = true }; };
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
using std::make_unsigned;
|
||||
#else
|
||||
// TODO: Possibly improve this implementation of make_unsigned.
|
||||
// It is currently used only by
|
||||
// template<typename Scalar> struct random_default_impl<Scalar, false, true>.
|
||||
template<typename> struct make_unsigned;
|
||||
template<> struct make_unsigned<char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<signed char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<unsigned char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<signed short> { typedef unsigned short type; };
|
||||
template<> struct make_unsigned<unsigned short> { typedef unsigned short type; };
|
||||
template<> struct make_unsigned<signed int> { typedef unsigned int type; };
|
||||
template<> struct make_unsigned<unsigned int> { typedef unsigned int type; };
|
||||
template<> struct make_unsigned<signed long> { typedef unsigned long type; };
|
||||
template<> struct make_unsigned<unsigned long> { typedef unsigned long type; };
|
||||
#if EIGEN_COMP_MSVC
|
||||
template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; };
|
||||
template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T> struct add_const { typedef const T type; };
|
||||
template <typename T> struct add_const<T&> { typedef T& type; };
|
||||
|
||||
|
|
@ -485,6 +507,26 @@ T div_ceil(const T &a, const T &b)
|
|||
return (a+b-1) / b;
|
||||
}
|
||||
|
||||
// The aim of the following functions is to bypass -Wfloat-equal warnings
|
||||
// when we really want a strict equality comparison on floating points.
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE
|
||||
bool equal_strict(const X& x,const Y& y) { return x == y; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
|
||||
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE
|
||||
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
} // end namespace Eigen
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#pragma warning pop
|
||||
#elif defined __clang__
|
||||
#pragma clang diagnostic pop
|
||||
#elif defined __GNUC__ && __GNUC__>=6
|
||||
#elif defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_STATIC_ASSERT
|
||||
#ifndef EIGEN_NO_STATIC_ASSERT
|
||||
|
||||
#if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
|
||||
|
|
@ -44,64 +45,65 @@
|
|||
struct static_assertion<true>
|
||||
{
|
||||
enum {
|
||||
YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
|
||||
YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
|
||||
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
|
||||
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
|
||||
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
|
||||
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
|
||||
OUT_OF_RANGE_ACCESS,
|
||||
YOU_MADE_A_PROGRAMMING_MISTAKE,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
|
||||
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
|
||||
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
|
||||
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
|
||||
UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
|
||||
THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
|
||||
NUMERIC_TYPE_MUST_BE_REAL,
|
||||
COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
|
||||
WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
|
||||
THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
|
||||
INVALID_MATRIX_PRODUCT,
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
|
||||
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
|
||||
THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
|
||||
INVALID_MATRIX_TEMPLATE_PARAMETERS,
|
||||
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
|
||||
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
|
||||
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
|
||||
YOU_ALREADY_SPECIFIED_THIS_STRIDE,
|
||||
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
|
||||
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
|
||||
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
|
||||
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
|
||||
THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
|
||||
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
|
||||
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
|
||||
THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
|
||||
THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
|
||||
THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
|
||||
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
|
||||
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
|
||||
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
|
||||
STORAGE_LAYOUT_DOES_NOT_MATCH,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
|
||||
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
|
||||
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
|
||||
THIS_TYPE_IS_NOT_SUPPORTED,
|
||||
STORAGE_KIND_MUST_MATCH,
|
||||
STORAGE_INDEX_MUST_MATCH,
|
||||
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
|
||||
YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
|
||||
YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
|
||||
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
|
||||
OUT_OF_RANGE_ACCESS=1,
|
||||
YOU_MADE_A_PROGRAMMING_MISTAKE=1,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
|
||||
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
|
||||
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
|
||||
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
|
||||
UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
|
||||
THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
|
||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
|
||||
NUMERIC_TYPE_MUST_BE_REAL=1,
|
||||
COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
|
||||
WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
|
||||
INVALID_MATRIX_PRODUCT=1,
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
|
||||
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
|
||||
INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
|
||||
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
|
||||
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
|
||||
YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
|
||||
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
|
||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
|
||||
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
|
||||
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
|
||||
THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
|
||||
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
|
||||
THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
|
||||
THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
|
||||
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
|
||||
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
|
||||
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
|
||||
STORAGE_LAYOUT_DOES_NOT_MATCH=1,
|
||||
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
|
||||
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
|
||||
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
|
||||
THIS_TYPE_IS_NOT_SUPPORTED=1,
|
||||
STORAGE_KIND_MUST_MATCH=1,
|
||||
STORAGE_INDEX_MUST_MATCH=1,
|
||||
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
|
||||
SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
|
||||
};
|
||||
};
|
||||
|
||||
|
|
@ -131,7 +133,7 @@
|
|||
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
|
||||
|
||||
#endif // EIGEN_NO_STATIC_ASSERT
|
||||
|
||||
#endif // EIGEN_STATIC_ASSERT
|
||||
|
||||
// static assertion failing if the type \a TYPE is not a vector type
|
||||
#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
|
||||
|
|
|
|||
|
|
@ -311,7 +311,6 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
|
|||
// Aliases:
|
||||
Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
|
||||
ComplexVectorType &cv = m_tmp;
|
||||
const MatrixType &mZ = m_realQZ.matrixZ();
|
||||
const MatrixType &mS = m_realQZ.matrixS();
|
||||
const MatrixType &mT = m_realQZ.matrixT();
|
||||
|
||||
|
|
@ -351,7 +350,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
|
|||
}
|
||||
}
|
||||
}
|
||||
m_eivec.col(i).real().noalias() = mZ.transpose() * v;
|
||||
m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;
|
||||
m_eivec.col(i).real().normalize();
|
||||
m_eivec.col(i).imag().setConstant(0);
|
||||
}
|
||||
|
|
@ -400,7 +399,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
|
|||
/ (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
|
||||
}
|
||||
}
|
||||
m_eivec.col(i+1).noalias() = (mZ.transpose() * cv);
|
||||
m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);
|
||||
m_eivec.col(i+1).normalize();
|
||||
m_eivec.col(i) = m_eivec.col(i+1).conjugate();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ template<typename Derived>
|
|||
inline typename MatrixBase<Derived>::EigenvaluesReturnType
|
||||
MatrixBase<Derived>::eigenvalues() const
|
||||
{
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
return internal::eigenvalues_selector<Derived, NumTraits<Scalar>::IsComplex>::run(derived());
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +87,6 @@ template<typename MatrixType, unsigned int UpLo>
|
|||
inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
|
||||
SelfAdjointView<MatrixType, UpLo>::eigenvalues() const
|
||||
{
|
||||
typedef typename SelfAdjointView<MatrixType, UpLo>::PlainObject PlainObject;
|
||||
PlainObject thisAsMatrix(*this);
|
||||
return SelfAdjointEigenSolver<PlainObject>(thisAsMatrix, false).eigenvalues();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -303,7 +303,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
|
|||
Scalar exshift(0); // sum of exceptional shifts
|
||||
Scalar norm = computeNormOfT();
|
||||
|
||||
if(norm!=0)
|
||||
if(norm!=Scalar(0))
|
||||
{
|
||||
while (iu >= 0)
|
||||
{
|
||||
|
|
@ -327,7 +327,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
|
|||
else // No convergence yet
|
||||
{
|
||||
// The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
|
||||
Vector3s firstHouseholderVector(0,0,0), shiftInfo;
|
||||
Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;
|
||||
computeShift(iu, iter, exshift, shiftInfo);
|
||||
iter = iter + 1;
|
||||
totalIter = totalIter + 1;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ namespace Eigen {
|
|||
|
||||
/** \internal Specialization for the data types supported by LAPACKe */
|
||||
|
||||
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \
|
||||
#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \
|
||||
template<> template<typename InputType> inline \
|
||||
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
|
||||
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
|
||||
|
|
@ -47,7 +47,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
|
|||
&& (options&EigVecMask)!=EigVecMask \
|
||||
&& "invalid option parameter"); \
|
||||
bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \
|
||||
lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, matrix_order, info; \
|
||||
lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \
|
||||
m_eivalues.resize(n,1); \
|
||||
m_subdiag.resize(n-1); \
|
||||
m_eivec = matrix; \
|
||||
|
|
@ -63,27 +63,24 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
|
|||
} \
|
||||
\
|
||||
lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \
|
||||
matrix_order=LAPACKE_COLROW; \
|
||||
char jobz, uplo='L'/*, range='A'*/; \
|
||||
jobz = computeEigenvectors ? 'V' : 'N'; \
|
||||
\
|
||||
info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
|
||||
info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
|
||||
m_info = (info==0) ? Success : NoConvergence; \
|
||||
m_isInitialized = true; \
|
||||
m_eigenvectorsOk = computeEigenvectors; \
|
||||
return *this; \
|
||||
}
|
||||
|
||||
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME ) \
|
||||
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor ) \
|
||||
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor )
|
||||
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, ColMajor, LAPACK_COL_MAJOR)
|
||||
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, RowMajor, LAPACK_ROW_MAJOR)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)
|
||||
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev)
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const Quaterni
|
|||
if (n != Scalar(0))
|
||||
{
|
||||
m_angle = Scalar(2)*atan2(n, abs(q.w()));
|
||||
if(q.w() < 0)
|
||||
if(q.w() < Scalar(0))
|
||||
n = -n;
|
||||
m_axis = q.vec() / n;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase<Derived, 3>
|
|||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::traits<Derived>::Coefficients Coefficients;
|
||||
typedef typename Coefficients::CoeffReturnType CoeffReturnType;
|
||||
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
|
||||
Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
|
||||
|
||||
|
||||
enum {
|
||||
Flags = Eigen::internal::traits<Derived>::Flags
|
||||
};
|
||||
|
|
@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase<Derived, 3>
|
|||
|
||||
|
||||
/** \returns the \c x coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
|
||||
/** \returns the \c y coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
|
||||
/** \returns the \c z coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
|
||||
/** \returns the \c w coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
|
||||
|
||||
/** \returns a reference to the \c x coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
|
||||
/** \returns a reference to the \c y coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
|
||||
/** \returns a reference to the \c z coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
|
||||
/** \returns a reference to the \c w coefficient */
|
||||
EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
|
||||
/** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
|
||||
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
|
||||
/** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
|
||||
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
|
||||
/** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
|
||||
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
|
||||
/** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
|
||||
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
|
||||
|
||||
/** \returns a read-only vector expression of the imaginary part (x,y,z) */
|
||||
EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
|
|||
{
|
||||
for(Index j=0; j<mat.outerSize(); ++j)
|
||||
{
|
||||
RealScalar sum = mat.innerVector(j).squaredNorm();
|
||||
RealScalar sum = mat.col(j).squaredNorm();
|
||||
if(sum>RealScalar(0))
|
||||
m_invdiag(j) = RealScalar(1)/sum;
|
||||
else
|
||||
|
|
|
|||
|
|
@ -50,7 +50,8 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
|
|||
tol_error = 0;
|
||||
return;
|
||||
}
|
||||
RealScalar threshold = tol*tol*rhsNorm2;
|
||||
const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
|
||||
RealScalar threshold = numext::maxi(tol*tol*rhsNorm2,considerAsZero);
|
||||
RealScalar residualNorm2 = residual.squaredNorm();
|
||||
if (residualNorm2 < threshold)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -65,11 +65,11 @@ template<typename Scalar> class JacobiRotation
|
|||
bool makeJacobi(const MatrixBase<Derived>&, Index p, Index q);
|
||||
bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);
|
||||
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* r=0);
|
||||
|
||||
protected:
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::true_type);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::false_type);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type);
|
||||
|
||||
Scalar m_c, m_s;
|
||||
};
|
||||
|
|
@ -84,7 +84,6 @@ bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, co
|
|||
{
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
RealScalar deno = RealScalar(2)*abs(y);
|
||||
if(deno < (std::numeric_limits<RealScalar>::min)())
|
||||
{
|
||||
|
|
@ -133,7 +132,7 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Ind
|
|||
* \f$ V = \left ( \begin{array}{c} p \\ q \end{array} \right )\f$ yields:
|
||||
* \f$ G^* V = \left ( \begin{array}{c} r \\ 0 \end{array} \right )\f$.
|
||||
*
|
||||
* The value of \a z is returned if \a z is not null (the default is null).
|
||||
* The value of \a r is returned if \a r is not null (the default is null).
|
||||
* Also note that G is built such that the cosine is always real.
|
||||
*
|
||||
* Example: \include Jacobi_makeGivens.cpp
|
||||
|
|
@ -146,9 +145,9 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Ind
|
|||
* \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* z)
|
||||
void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r)
|
||||
{
|
||||
makeGivens(p, q, z, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
|
||||
makeGivens(p, q, r, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -298,16 +297,144 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
|
|||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar, typename OtherScalar,
|
||||
int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
|
||||
struct apply_rotation_in_the_plane_selector
|
||||
{
|
||||
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
|
||||
{
|
||||
for(Index i=0; i<size; ++i)
|
||||
{
|
||||
Scalar xi = *x;
|
||||
Scalar yi = *y;
|
||||
*x = c * xi + numext::conj(s) * yi;
|
||||
*y = -s * xi + numext::conj(c) * yi;
|
||||
x += incrx;
|
||||
y += incry;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, typename OtherScalar,
|
||||
int SizeAtCompileTime, int MinAlignment>
|
||||
struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
|
||||
{
|
||||
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
|
||||
{
|
||||
enum {
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
OtherPacketSize = packet_traits<OtherScalar>::size
|
||||
};
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<OtherScalar>::type OtherPacket;
|
||||
|
||||
/*** dynamic-size vectorized paths ***/
|
||||
if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
|
||||
{
|
||||
// both vectors are sequentially stored in memory => vectorization
|
||||
enum { Peeling = 2 };
|
||||
|
||||
Index alignedStart = internal::first_default_aligned(y, size);
|
||||
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
||||
|
||||
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||
conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
|
||||
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||
|
||||
for(Index i=0; i<alignedStart; ++i)
|
||||
{
|
||||
Scalar xi = x[i];
|
||||
Scalar yi = y[i];
|
||||
x[i] = c * xi + numext::conj(s) * yi;
|
||||
y[i] = -s * xi + numext::conj(c) * yi;
|
||||
}
|
||||
|
||||
Scalar* EIGEN_RESTRICT px = x + alignedStart;
|
||||
Scalar* EIGEN_RESTRICT py = y + alignedStart;
|
||||
|
||||
if(internal::first_default_aligned(x, size)==alignedStart)
|
||||
{
|
||||
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||
{
|
||||
Packet xi = pload<Packet>(px);
|
||||
Packet yi = pload<Packet>(py);
|
||||
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
px += PacketSize;
|
||||
py += PacketSize;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
|
||||
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
|
||||
{
|
||||
Packet xi = ploadu<Packet>(px);
|
||||
Packet xi1 = ploadu<Packet>(px+PacketSize);
|
||||
Packet yi = pload <Packet>(py);
|
||||
Packet yi1 = pload <Packet>(py+PacketSize);
|
||||
pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
|
||||
pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
|
||||
px += Peeling*PacketSize;
|
||||
py += Peeling*PacketSize;
|
||||
}
|
||||
if(alignedEnd!=peelingEnd)
|
||||
{
|
||||
Packet xi = ploadu<Packet>(x+peelingEnd);
|
||||
Packet yi = pload <Packet>(y+peelingEnd);
|
||||
pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
}
|
||||
}
|
||||
|
||||
for(Index i=alignedEnd; i<size; ++i)
|
||||
{
|
||||
Scalar xi = x[i];
|
||||
Scalar yi = y[i];
|
||||
x[i] = c * xi + numext::conj(s) * yi;
|
||||
y[i] = -s * xi + numext::conj(c) * yi;
|
||||
}
|
||||
}
|
||||
|
||||
/*** fixed-size vectorized path ***/
|
||||
else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
|
||||
{
|
||||
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||
conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
|
||||
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||
Scalar* EIGEN_RESTRICT px = x;
|
||||
Scalar* EIGEN_RESTRICT py = y;
|
||||
for(Index i=0; i<size; i+=PacketSize)
|
||||
{
|
||||
Packet xi = pload<Packet>(px);
|
||||
Packet yi = pload<Packet>(py);
|
||||
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
px += PacketSize;
|
||||
py += PacketSize;
|
||||
}
|
||||
}
|
||||
|
||||
/*** non-vectorized path ***/
|
||||
else
|
||||
{
|
||||
apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename VectorX, typename VectorY, typename OtherScalar>
|
||||
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
|
||||
{
|
||||
typedef typename VectorX::Scalar Scalar;
|
||||
enum {
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
OtherPacketSize = packet_traits<OtherScalar>::size
|
||||
};
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<OtherScalar>::type OtherPacket;
|
||||
const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit)
|
||||
&& (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
|
||||
|
||||
eigen_assert(xpr_x.size() == xpr_y.size());
|
||||
Index size = xpr_x.size();
|
||||
Index incrx = xpr_x.derived().innerStride();
|
||||
|
|
@ -321,117 +448,11 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||
if (c==OtherScalar(1) && s==OtherScalar(0))
|
||||
return;
|
||||
|
||||
/*** dynamic-size vectorized paths ***/
|
||||
|
||||
if(VectorX::SizeAtCompileTime == Dynamic &&
|
||||
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
||||
(PacketSize == OtherPacketSize) &&
|
||||
((incrx==1 && incry==1) || PacketSize == 1))
|
||||
{
|
||||
// both vectors are sequentially stored in memory => vectorization
|
||||
enum { Peeling = 2 };
|
||||
|
||||
Index alignedStart = internal::first_default_aligned(y, size);
|
||||
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
||||
|
||||
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||
conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
|
||||
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||
|
||||
for(Index i=0; i<alignedStart; ++i)
|
||||
{
|
||||
Scalar xi = x[i];
|
||||
Scalar yi = y[i];
|
||||
x[i] = c * xi + numext::conj(s) * yi;
|
||||
y[i] = -s * xi + numext::conj(c) * yi;
|
||||
}
|
||||
|
||||
Scalar* EIGEN_RESTRICT px = x + alignedStart;
|
||||
Scalar* EIGEN_RESTRICT py = y + alignedStart;
|
||||
|
||||
if(internal::first_default_aligned(x, size)==alignedStart)
|
||||
{
|
||||
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||
{
|
||||
Packet xi = pload<Packet>(px);
|
||||
Packet yi = pload<Packet>(py);
|
||||
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
px += PacketSize;
|
||||
py += PacketSize;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
|
||||
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
|
||||
{
|
||||
Packet xi = ploadu<Packet>(px);
|
||||
Packet xi1 = ploadu<Packet>(px+PacketSize);
|
||||
Packet yi = pload <Packet>(py);
|
||||
Packet yi1 = pload <Packet>(py+PacketSize);
|
||||
pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
|
||||
pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
|
||||
px += Peeling*PacketSize;
|
||||
py += Peeling*PacketSize;
|
||||
}
|
||||
if(alignedEnd!=peelingEnd)
|
||||
{
|
||||
Packet xi = ploadu<Packet>(x+peelingEnd);
|
||||
Packet yi = pload <Packet>(y+peelingEnd);
|
||||
pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
}
|
||||
}
|
||||
|
||||
for(Index i=alignedEnd; i<size; ++i)
|
||||
{
|
||||
Scalar xi = x[i];
|
||||
Scalar yi = y[i];
|
||||
x[i] = c * xi + numext::conj(s) * yi;
|
||||
y[i] = -s * xi + numext::conj(c) * yi;
|
||||
}
|
||||
}
|
||||
|
||||
/*** fixed-size vectorized path ***/
|
||||
else if(VectorX::SizeAtCompileTime != Dynamic &&
|
||||
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
||||
(PacketSize == OtherPacketSize) &&
|
||||
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
|
||||
{
|
||||
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||
conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
|
||||
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||
Scalar* EIGEN_RESTRICT px = x;
|
||||
Scalar* EIGEN_RESTRICT py = y;
|
||||
for(Index i=0; i<size; i+=PacketSize)
|
||||
{
|
||||
Packet xi = pload<Packet>(px);
|
||||
Packet yi = pload<Packet>(py);
|
||||
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||
px += PacketSize;
|
||||
py += PacketSize;
|
||||
}
|
||||
}
|
||||
|
||||
/*** non-vectorized path ***/
|
||||
else
|
||||
{
|
||||
for(Index i=0; i<size; ++i)
|
||||
{
|
||||
Scalar xi = *x;
|
||||
Scalar yi = *y;
|
||||
*x = c * xi + numext::conj(s) * yi;
|
||||
*y = -s * xi + numext::conj(c) * yi;
|
||||
x += incrx;
|
||||
y += incry;
|
||||
}
|
||||
}
|
||||
apply_rotation_in_the_plane_selector<
|
||||
Scalar,OtherScalar,
|
||||
VectorX::SizeAtCompileTime,
|
||||
EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
|
||||
Vectorizable>::run(x,incrx,y,incry,size,c,s);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
|
|
|||
|
|
@ -404,7 +404,7 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
|
|||
const RealScalar& absDeterminantThreshold
|
||||
) const
|
||||
{
|
||||
RealScalar determinant;
|
||||
Scalar determinant;
|
||||
// i'd love to put some static assertions there, but SFINAE means that they have no effect...
|
||||
eigen_assert(rows() == cols());
|
||||
computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);
|
||||
|
|
|
|||
|
|
@ -64,28 +64,28 @@ namespace internal
|
|||
typedef typename _MatrixType::StorageIndex StorageIndex;
|
||||
};
|
||||
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
|
||||
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
|
||||
}
|
||||
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
|
||||
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
|
||||
}
|
||||
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
|
||||
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm);
|
||||
}
|
||||
|
||||
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
|
||||
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
|
||||
{
|
||||
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
|
||||
if (nbrhs == 0) {x = NULL; nbrhs=1;}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
|
||||
// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
|
||||
// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
||||
// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
|
|
@ -696,7 +696,9 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar
|
|||
for(Index i=0; i<m; ++i)
|
||||
{
|
||||
Index j = perm(i);
|
||||
res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu));
|
||||
// The following expression could be rewritten to involve only a single division,
|
||||
// but this would make the expression more sensitive to overflow.
|
||||
res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
|
||||
}
|
||||
return res;
|
||||
|
||||
|
|
@ -708,9 +710,12 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
|
|||
{
|
||||
using std::abs;
|
||||
using std::swap;
|
||||
using std::sqrt;
|
||||
|
||||
Index n = col0.size();
|
||||
Index actual_n = n;
|
||||
// Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
|
||||
// because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
|
||||
while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
|
||||
|
||||
for (Index k = 0; k < n; ++k)
|
||||
|
|
@ -732,7 +737,9 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
|
|||
right = (diag(actual_n-1) + col0.matrix().norm());
|
||||
else
|
||||
{
|
||||
// Skip deflated singular values
|
||||
// Skip deflated singular values,
|
||||
// recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
|
||||
// This should be equivalent to using perm[]
|
||||
Index l = k+1;
|
||||
while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
|
||||
right = diag(l);
|
||||
|
|
@ -818,15 +825,23 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
|
|||
RealScalar leftShifted, rightShifted;
|
||||
if (shift == left)
|
||||
{
|
||||
leftShifted = (std::numeric_limits<RealScalar>::min)();
|
||||
// to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
|
||||
// the factor 2 is to be more conservative
|
||||
leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
|
||||
|
||||
// check that we did it right:
|
||||
eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
|
||||
// I don't understand why the case k==0 would be special there:
|
||||
// if (k == 0) rightShifted = right - left; else
|
||||
rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe
|
||||
rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
|
||||
}
|
||||
else
|
||||
{
|
||||
leftShifted = -(right - left) * RealScalar(0.6);
|
||||
rightShifted = -(std::numeric_limits<RealScalar>::min)();
|
||||
leftShifted = -(right - left) * RealScalar(0.51);
|
||||
if(k+1<n)
|
||||
rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
|
||||
else
|
||||
rightShifted = -(std::numeric_limits<RealScalar>::min)();
|
||||
}
|
||||
|
||||
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
|
||||
|
|
@ -980,7 +995,7 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
|
|||
Index start = firstCol + shift;
|
||||
RealScalar c = m_computed(start, start);
|
||||
RealScalar s = m_computed(start+i, start);
|
||||
RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
|
||||
RealScalar r = numext::hypot(c,s);
|
||||
if (r == Literal(0))
|
||||
{
|
||||
m_computed(start+i, start+i) = Literal(0);
|
||||
|
|
|
|||
|
|
@ -61,9 +61,10 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
|
|||
u = (LAPACKE_TYPE*)m_matrixU.data(); \
|
||||
} else { ldu=1; u=&dummy; }\
|
||||
MatrixType localV; \
|
||||
ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
|
||||
lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
|
||||
if (computeV()) { \
|
||||
localV.resize(ldvt, m_cols); \
|
||||
localV.resize(vt_rows, m_cols); \
|
||||
ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \
|
||||
vt = (LAPACKE_TYPE*)localV.data(); \
|
||||
} else { ldvt=1; vt=&dummy; }\
|
||||
Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
|
||||
|
|
|
|||
|
|
@ -180,8 +180,10 @@ public:
|
|||
RealScalar threshold() const
|
||||
{
|
||||
eigen_assert(m_isInitialized || m_usePrescribedThreshold);
|
||||
// this temporary is needed to workaround a MSVC issue
|
||||
Index diagSize = (std::max<Index>)(1,m_diagSize);
|
||||
return m_usePrescribedThreshold ? m_prescribedThreshold
|
||||
: (std::max<Index>)(1,m_diagSize)*NumTraits<Scalar>::epsilon();
|
||||
: diagSize*NumTraits<Scalar>::epsilon();
|
||||
}
|
||||
|
||||
/** \returns true if \a U (full or thin) is asked for in this SVD decomposition */
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ class AmbiVector
|
|||
Index allocSize = m_allocatedElements * sizeof(ListEl);
|
||||
allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);
|
||||
Scalar* newBuffer = new Scalar[allocSize];
|
||||
memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl));
|
||||
std::memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl));
|
||||
delete[] m_buffer;
|
||||
m_buffer = newBuffer;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,9 @@ namespace internal {
|
|||
template<typename Lhs, typename Rhs, typename ResultType>
|
||||
static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type::Scalar Scalar;
|
||||
typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
|
||||
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
|
||||
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
|
||||
|
||||
// make sure to call innerSize/outerSize since we fake the storage order.
|
||||
Index rows = lhs.innerSize();
|
||||
|
|
@ -25,7 +27,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
|
|||
eigen_assert(lhs.outerSize() == rhs.innerSize());
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0);
|
||||
|
||||
std::memset(mask,0,sizeof(bool)*rows);
|
||||
|
|
@ -51,12 +53,12 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
|
|||
Index nnz = 0;
|
||||
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
|
||||
{
|
||||
Scalar y = rhsIt.value();
|
||||
RhsScalar y = rhsIt.value();
|
||||
Index k = rhsIt.index();
|
||||
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
|
||||
{
|
||||
Index i = lhsIt.index();
|
||||
Scalar x = lhsIt.value();
|
||||
LhsScalar x = lhsIt.value();
|
||||
if(!mask[i])
|
||||
{
|
||||
mask[i] = true;
|
||||
|
|
@ -166,11 +168,12 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
|
|||
{
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
|
||||
RowMajorMatrix rhsRow = rhs;
|
||||
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
|
||||
res = resRow;
|
||||
typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;
|
||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
|
||||
RowMajorRhs rhsRow = rhs;
|
||||
RowMajorRes resRow(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);
|
||||
res = resRow;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -179,10 +182,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
|
|||
{
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
|
||||
RowMajorMatrix lhsRow = lhs;
|
||||
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
|
||||
typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;
|
||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
|
||||
RowMajorLhs lhsRow = lhs;
|
||||
RowMajorRes resRow(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);
|
||||
res = resRow;
|
||||
}
|
||||
};
|
||||
|
|
@ -219,10 +223,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
|
|||
{
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
|
||||
ColMajorMatrix lhsCol = lhs;
|
||||
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
|
||||
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
|
||||
ColMajorLhs lhsCol = lhs;
|
||||
ColMajorRes resCol(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);
|
||||
res = resCol;
|
||||
}
|
||||
};
|
||||
|
|
@ -232,10 +237,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
|
|||
{
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
|
||||
ColMajorMatrix rhsCol = rhs;
|
||||
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
|
||||
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
|
||||
ColMajorRhs rhsCol = rhs;
|
||||
ColMajorRes resCol(lhs.rows(), rhs.cols());
|
||||
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);
|
||||
res = resCol;
|
||||
}
|
||||
};
|
||||
|
|
@ -263,7 +269,8 @@ namespace internal {
|
|||
template<typename Lhs, typename Rhs, typename ResultType>
|
||||
static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type::Scalar Scalar;
|
||||
typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
|
||||
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
|
||||
Index cols = rhs.outerSize();
|
||||
eigen_assert(lhs.outerSize() == rhs.innerSize());
|
||||
|
||||
|
|
@ -274,12 +281,12 @@ static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs,
|
|||
{
|
||||
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
|
||||
{
|
||||
Scalar y = rhsIt.value();
|
||||
RhsScalar y = rhsIt.value();
|
||||
Index k = rhsIt.index();
|
||||
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
|
||||
{
|
||||
Index i = lhsIt.index();
|
||||
Scalar x = lhsIt.value();
|
||||
LhsScalar x = lhsIt.value();
|
||||
res.coeffRef(i,j) += x * y;
|
||||
}
|
||||
}
|
||||
|
|
@ -310,9 +317,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMa
|
|||
{
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
|
||||
ColMajorMatrix lhsCol(lhs);
|
||||
internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res);
|
||||
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
|
||||
ColMajorLhs lhsCol(lhs);
|
||||
internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -321,9 +328,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMa
|
|||
{
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
|
||||
ColMajorMatrix rhsCol(rhs);
|
||||
internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res);
|
||||
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
|
||||
ColMajorRhs rhsCol(rhs);
|
||||
internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -893,7 +893,7 @@ public:
|
|||
|
||||
Index p = m_outerIndex[outer] + m_innerNonZeros[outer]++;
|
||||
m_data.index(p) = convert_index(inner);
|
||||
return (m_data.value(p) = 0);
|
||||
return (m_data.value(p) = Scalar(0));
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
@ -1274,7 +1274,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
|
|||
m_innerNonZeros[outer]++;
|
||||
|
||||
m_data.index(p) = inner;
|
||||
return (m_data.value(p) = 0);
|
||||
return (m_data.value(p) = Scalar(0));
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
|
|
@ -1381,7 +1381,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
|
|||
}
|
||||
|
||||
m_data.index(p) = inner;
|
||||
return (m_data.value(p) = 0);
|
||||
return (m_data.value(p) = Scalar(0));
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
|
|
|||
|
|
@ -311,7 +311,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
|
|||
while (i && i.index()<j) ++i;
|
||||
if(i && i.index()==j)
|
||||
{
|
||||
res(j,k) += alpha * i.value() * rhs(j,k);
|
||||
res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
|
@ -324,14 +324,14 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
|
|||
{
|
||||
LhsScalar lhs_ij = i.value();
|
||||
if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij);
|
||||
res_j += lhs_ij * rhs(i.index(),k);
|
||||
res_j += lhs_ij * rhs.coeff(i.index(),k);
|
||||
res(i.index(),k) += numext::conj(lhs_ij) * rhs_j;
|
||||
}
|
||||
res(j,k) += alpha * res_j;
|
||||
res.coeffRef(j,k) += alpha * res_j;
|
||||
|
||||
// handle diagonal coeff
|
||||
if (ProcessFirstHalf && i && (i.index()==j))
|
||||
res(j,k) += alpha * i.value() * rhs(j,k);
|
||||
res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
|
|||
{
|
||||
// return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res);
|
||||
|
||||
typedef typename remove_all<Lhs>::type::Scalar Scalar;
|
||||
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
|
||||
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
|
||||
typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex;
|
||||
|
||||
// make sure to call innerSize/outerSize since we fake the storage order.
|
||||
|
|
@ -31,7 +32,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
|
|||
eigen_assert(lhs.outerSize() == rhs.innerSize());
|
||||
|
||||
// allocate a temporary buffer
|
||||
AmbiVector<Scalar,StorageIndex> tempVector(rows);
|
||||
AmbiVector<ResScalar,StorageIndex> tempVector(rows);
|
||||
|
||||
// mimics a resizeByInnerOuter:
|
||||
if(ResultType::IsRowMajor)
|
||||
|
|
@ -63,14 +64,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
|
|||
{
|
||||
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
|
||||
tempVector.restart();
|
||||
Scalar x = rhsIt.value();
|
||||
RhsScalar x = rhsIt.value();
|
||||
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
|
||||
{
|
||||
tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
|
||||
}
|
||||
}
|
||||
res.startVec(j);
|
||||
for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
|
||||
for (typename AmbiVector<ResScalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
|
||||
res.insertBackByOuterInner(j,it.index()) = it.value();
|
||||
}
|
||||
res.finalize();
|
||||
|
|
@ -85,7 +86,6 @@ struct sparse_sparse_product_with_pruning_selector;
|
|||
template<typename Lhs, typename Rhs, typename ResultType>
|
||||
struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
|
||||
{
|
||||
typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
|
||||
typedef typename ResultType::RealScalar RealScalar;
|
||||
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
|
||||
|
|
@ -129,8 +129,8 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
|
|||
typedef typename ResultType::RealScalar RealScalar;
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
|
||||
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
|
||||
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
|
||||
ColMajorMatrixLhs colLhs(lhs);
|
||||
ColMajorMatrixRhs colRhs(rhs);
|
||||
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);
|
||||
|
|
@ -149,7 +149,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
|
|||
typedef typename ResultType::RealScalar RealScalar;
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
|
||||
typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
|
||||
RowMajorMatrixLhs rowLhs(lhs);
|
||||
sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);
|
||||
}
|
||||
|
|
@ -161,7 +161,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
|
|||
typedef typename ResultType::RealScalar RealScalar;
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
|
||||
typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
|
||||
RowMajorMatrixRhs rowRhs(rhs);
|
||||
sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);
|
||||
}
|
||||
|
|
@ -173,7 +173,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
|
|||
typedef typename ResultType::RealScalar RealScalar;
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
|
||||
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
|
||||
ColMajorMatrixRhs colRhs(rhs);
|
||||
internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);
|
||||
}
|
||||
|
|
@ -185,7 +185,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
|
|||
typedef typename ResultType::RealScalar RealScalar;
|
||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
|
||||
{
|
||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
|
||||
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
|
||||
ColMajorMatrixLhs colLhs(lhs);
|
||||
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -499,8 +499,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
|
|||
eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
|
||||
eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
|
||||
|
||||
typedef typename IndexVector::Scalar StorageIndex;
|
||||
|
||||
m_isInitialized = true;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ namespace internal {
|
|||
* rank-revealing permutations. Use colsPermutation() to get it.
|
||||
*
|
||||
* Q is the orthogonal matrix represented as products of Householder reflectors.
|
||||
* Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose.
|
||||
* Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint.
|
||||
* You can then apply it to a vector.
|
||||
*
|
||||
* R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
|
||||
|
|
@ -65,6 +65,7 @@ namespace internal {
|
|||
* \implsparsesolverconcept
|
||||
*
|
||||
* \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).
|
||||
* \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.
|
||||
*
|
||||
*/
|
||||
template<typename _MatrixType, typename _OrderingType>
|
||||
|
|
@ -196,9 +197,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
|
|||
|
||||
Index rank = this->rank();
|
||||
|
||||
// Compute Q^T * b;
|
||||
// Compute Q^* * b;
|
||||
typename Dest::PlainObject y, b;
|
||||
y = this->matrixQ().transpose() * B;
|
||||
y = this->matrixQ().adjoint() * B;
|
||||
b = y;
|
||||
|
||||
// Solve with the triangular matrix R
|
||||
|
|
@ -604,7 +605,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
|
|||
// Get the references
|
||||
SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) :
|
||||
m_qr(qr),m_other(other),m_transpose(transpose) {}
|
||||
inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); }
|
||||
inline Index rows() const { return m_qr.matrixQ().rows(); }
|
||||
inline Index cols() const { return m_other.cols(); }
|
||||
|
||||
// Assign to a vector
|
||||
|
|
@ -632,7 +633,10 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
|
|||
}
|
||||
else
|
||||
{
|
||||
eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes");
|
||||
eigen_assert(m_qr.matrixQ().cols() == m_other.rows() && "Non conforming object sizes");
|
||||
|
||||
res.conservativeResize(rows(), cols());
|
||||
|
||||
// Compute res = Q * other column by column
|
||||
for(Index j = 0; j < res.cols(); j++)
|
||||
{
|
||||
|
|
@ -641,7 +645,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
|
|||
Scalar tau = Scalar(0);
|
||||
tau = m_qr.m_Q.col(k).dot(res.col(j));
|
||||
if(tau==Scalar(0)) continue;
|
||||
tau = tau * m_qr.m_hcoeffs(k);
|
||||
tau = tau * numext::conj(m_qr.m_hcoeffs(k));
|
||||
res.col(j) -= tau * m_qr.m_Q.col(k);
|
||||
}
|
||||
}
|
||||
|
|
@ -650,7 +654,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
|
|||
|
||||
const SparseQRType& m_qr;
|
||||
const Derived& m_other;
|
||||
bool m_transpose;
|
||||
bool m_transpose; // TODO this actually means adjoint
|
||||
};
|
||||
|
||||
template<typename SparseQRType>
|
||||
|
|
@ -668,13 +672,14 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
|
|||
{
|
||||
return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false);
|
||||
}
|
||||
// To use for operations with the adjoint of Q
|
||||
SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const
|
||||
{
|
||||
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
|
||||
}
|
||||
inline Index rows() const { return m_qr.rows(); }
|
||||
inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); }
|
||||
// To use for operations with the transpose of Q
|
||||
inline Index cols() const { return m_qr.rows(); }
|
||||
// To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment
|
||||
SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const
|
||||
{
|
||||
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
|
||||
|
|
@ -682,6 +687,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
|
|||
const SparseQRType& m_qr;
|
||||
};
|
||||
|
||||
// TODO this actually represents the adjoint of Q
|
||||
template<typename SparseQRType>
|
||||
struct SparseQRMatrixQTransposeReturnType
|
||||
{
|
||||
|
|
@ -712,7 +718,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
|
|||
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
|
||||
{
|
||||
typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows());
|
||||
typename DstXprType::PlainObject idMat(src.rows(), src.cols());
|
||||
idMat.setIdentity();
|
||||
// Sort the sparse householder reflectors if needed
|
||||
const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();
|
||||
|
|
|
|||
|
|
@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat)
|
|||
template<typename Scalar, int Flags, typename Index>
|
||||
MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
|
||||
{
|
||||
eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR
|
||||
|| (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC);
|
||||
eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
|
||||
|| ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
|
||||
|
||||
Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ if(PASTIX_FOUND AND PASTIX_pastix_nompi.h_INCLUDE_DIRS AND BLAS_FOUND)
|
|||
endif(SCOTCH_FOUND)
|
||||
set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES_DEP} ${ORDERING_LIBRARIES})
|
||||
set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES_DEP})
|
||||
endif(PASTIX_FOUND AND BLAS_FOUND)
|
||||
endif()
|
||||
|
||||
if(METIS_FOUND)
|
||||
include_directories(${METIS_INCLUDE_DIRS})
|
||||
|
|
|
|||
|
|
@ -45,10 +45,12 @@ install(TARGETS eigen_blas eigen_blas_static
|
|||
|
||||
if(EIGEN_Fortran_COMPILER_WORKS)
|
||||
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(testing) # can't do EXCLUDE_FROM_ALL here, breaks CTest
|
||||
else()
|
||||
add_subdirectory(testing EXCLUDE_FROM_ALL)
|
||||
if(BUILD_TESTING)
|
||||
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
|
||||
add_subdirectory(testing) # can't do EXCLUDE_FROM_ALL here, breaks CTest
|
||||
else()
|
||||
add_subdirectory(testing EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -11,13 +11,15 @@ add_custom_target(buildtests)
|
|||
add_custom_target(check COMMAND "ctest")
|
||||
add_dependencies(check buildtests)
|
||||
|
||||
# check whether /bin/bash exists
|
||||
find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH)
|
||||
# check whether /bin/bash exists (disabled as not used anymore)
|
||||
# find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH)
|
||||
|
||||
# This call activates testing and generates the DartConfiguration.tcl
|
||||
include(CTest)
|
||||
|
||||
set(EIGEN_TEST_BUILD_FLAGS "" CACHE STRING "Options passed to the build command of unit tests")
|
||||
set(EIGEN_DASHBOARD_BUILD_TARGET "buildtests" CACHE STRING "Target to be built in dashboard mode, default is buildtests")
|
||||
set(EIGEN_CTEST_ERROR_EXCEPTION "" CACHE STRING "Regular expression for build error messages to be filtered out")
|
||||
|
||||
# Overwrite default DartConfiguration.tcl such that ctest can build our unit tests.
|
||||
# Recall that our unit tests are not in the "all" target, so we have to explicitely ask ctest to build our custom 'buildtests' target.
|
||||
|
|
@ -28,7 +30,7 @@ string(REGEX MATCH "MakeCommand:.*-- (.*)\nDefaultCTestConfigurationType" EIGEN_
|
|||
if(NOT CMAKE_MATCH_1)
|
||||
string(REGEX MATCH "MakeCommand:.*[^c]make (.*)\nDefaultCTestConfigurationType" EIGEN_DUMMY ${EIGEN_DART_CONFIG_FILE})
|
||||
endif()
|
||||
string(REGEX REPLACE "MakeCommand:.*DefaultCTestConfigurationType" "MakeCommand: ${CMAKE_COMMAND} --build . --target buildtests --config \"\${CTEST_CONFIGURATION_TYPE}\" -- ${CMAKE_MATCH_1} ${EIGEN_TEST_BUILD_FLAGS}\nDefaultCTestConfigurationType"
|
||||
string(REGEX REPLACE "MakeCommand:.*DefaultCTestConfigurationType" "MakeCommand: ${CMAKE_COMMAND} --build . --target ${EIGEN_DASHBOARD_BUILD_TARGET} --config \"\${CTEST_CONFIGURATION_TYPE}\" -- ${CMAKE_MATCH_1} ${EIGEN_TEST_BUILD_FLAGS}\nDefaultCTestConfigurationType"
|
||||
EIGEN_DART_CONFIG_FILE2 ${EIGEN_DART_CONFIG_FILE})
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl" ${EIGEN_DART_CONFIG_FILE2})
|
||||
|
||||
|
|
@ -54,8 +56,3 @@ elseif(MSVC)
|
|||
endif(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
||||
|
||||
check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CXX11)
|
||||
|
||||
if(EIGEN_TEST_CXX11 AND EIGEN_COMPILER_SUPPORT_CXX11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
|||
|
||||
# Check the language being used
|
||||
get_property( _LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES )
|
||||
if( _LANGUAGES_ MATCHES Fortran )
|
||||
if( _LANGUAGES_ MATCHES Fortran AND CMAKE_Fortran_COMPILER)
|
||||
set( _CHECK_FORTRAN TRUE )
|
||||
elseif( (_LANGUAGES_ MATCHES C) OR (_LANGUAGES_ MATCHES CXX) )
|
||||
set( _CHECK_FORTRAN FALSE )
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ function(workaround_9220 language language_works)
|
|||
cmake_minimum_required(VERSION 2.8.0)
|
||||
set (CMAKE_Fortran_FLAGS \"${CMAKE_Fortran_FLAGS}\")
|
||||
set (CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS}\")
|
||||
enable_language(${language} OPTIONAL)
|
||||
enable_language(${language})
|
||||
")
|
||||
file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/language_tests/${language})
|
||||
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/language_tests/${language})
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ set(EIGEN_DOXY_PROJECT_NAME "Eigen-unsupported")
|
|||
set(EIGEN_DOXY_OUTPUT_DIRECTORY_SUFFIX "/unsupported")
|
||||
set(EIGEN_DOXY_INPUT "\"${Eigen_SOURCE_DIR}/unsupported/Eigen\" \"${Eigen_SOURCE_DIR}/unsupported/doc\"")
|
||||
set(EIGEN_DOXY_HTML_COLORSTYLE_HUE "0")
|
||||
# set(EIGEN_DOXY_TAGFILES "\"${Eigen_BINARY_DIR}/doc/eigen.doxytags =../\"")
|
||||
set(EIGEN_DOXY_TAGFILES "")
|
||||
set(EIGEN_DOXY_TAGFILES "\"${Eigen_BINARY_DIR}/doc/Eigen.doxytags=..\"")
|
||||
#set(EIGEN_DOXY_TAGFILES "")
|
||||
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ This also means that, unless specified, if the function \c std::foo is available
|
|||
\anchor cwisetable_conj
|
||||
a.\link ArrayBase::conjugate conjugate\endlink(); \n
|
||||
\link Eigen::conj conj\endlink(a); \n
|
||||
m.\link MatrixBase::conjugate conjugate();
|
||||
m.\link MatrixBase::conjugate conjugate\endlink();
|
||||
</td>
|
||||
<td><a href="https://en.wikipedia.org/wiki/Complex_conjugate">complex conjugate</a> (\f$ \bar{a_i} \f$),\n
|
||||
no-op for real </td>
|
||||
|
|
@ -133,8 +133,9 @@ This also means that, unless specified, if the function \c std::foo is available
|
|||
<td class="code">
|
||||
\anchor cwisetable_pow
|
||||
a.\link ArrayBase::pow pow\endlink(b); \n
|
||||
\link Eigen::pow pow\endlink(a,b);
|
||||
\link ArrayBase::pow(const Eigen::ArrayBase< Derived > &x, const Eigen::ArrayBase< ExponentDerived > &exponents) pow\endlink(a,b);
|
||||
</td>
|
||||
<!-- For some reason Doxygen thinks that pow is in ArrayBase namespace -->
|
||||
<td>raises a number to the given power (\f$ a_i ^ {b_i} \f$) \n \c a and \c b can be either an array or scalar.</td>
|
||||
<td class="code">
|
||||
using <a href="http://en.cppreference.com/w/cpp/numeric/math/pow">std::pow</a>; \n
|
||||
|
|
@ -271,7 +272,7 @@ This also means that, unless specified, if the function \c std::foo is available
|
|||
<tr>
|
||||
<td class="code">
|
||||
\anchor cwisetable_atan
|
||||
a.\link ArrayBase::atan tan\endlink(); \n
|
||||
a.\link ArrayBase::atan atan\endlink(); \n
|
||||
\link Eigen::atan atan\endlink(a);
|
||||
</td>
|
||||
<td>computes arc tangent (\f$ \tan^{-1} a_i \f$)</td>
|
||||
|
|
|
|||
|
|
@ -1596,6 +1596,7 @@ PREDEFINED = EIGEN_EMPTY_STRUCT \
|
|||
"EIGEN_CAT2(a,b)= a ## b"\
|
||||
"EIGEN_CAT(a,b)=EIGEN_CAT2(a,b)"\
|
||||
"EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME)=CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<LHS::Scalar, RHS::Scalar>, const LHS, const RHS>"\
|
||||
"EIGEN_ALIGN_TO_BOUNDARY(x)="\
|
||||
DOXCOMMA=,
|
||||
|
||||
|
||||
|
|
@ -1618,6 +1619,9 @@ EXPAND_AS_DEFINED = EIGEN_MAKE_TYPEDEFS \
|
|||
EIGEN_EULER_ANGLES_TYPEDEFS \
|
||||
EIGEN_EULER_ANGLES_SINGLE_TYPEDEF \
|
||||
EIGEN_EULER_SYSTEM_TYPEDEF \
|
||||
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY \
|
||||
EIGEN_MATRIX_FUNCTION \
|
||||
EIGEN_MATRIX_FUNCTION_1 \
|
||||
EIGEN_DOC_UNARY_ADDONS \
|
||||
EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL \
|
||||
EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
|
||||
|
|
@ -1665,7 +1669,7 @@ ALLEXTERNALS = NO
|
|||
# in the modules index. If set to NO, only the current project's groups will
|
||||
# be listed.
|
||||
|
||||
EXTERNAL_GROUPS = YES
|
||||
EXTERNAL_GROUPS = NO
|
||||
|
||||
# The PERL_PATH should be the absolute path and name of the perl script
|
||||
# interpreter (i.e. the result of `which perl').
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue