aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-02-06 05:25:03 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-02-06 05:25:03 -0800
commitc739102ef9a52fcb194dcc77f785aa55879987e4 (patch)
tree22d19d1df4cb20baea532fa1ce13208329ff53e3
parent2559fa9b0f20ea138cfb019d441ad1757221568d (diff)
parenta8f2c6eec788c5cccc6beb9b5837544ea98a7154 (diff)
Pulled the latest changes from the trunk
-rw-r--r--.hgignore2
-rw-r--r--CMakeLists.txt21
-rw-r--r--Eigen/Cholesky1
-rw-r--r--Eigen/CholmodSupport4
-rw-r--r--Eigen/Core56
-rw-r--r--Eigen/IterativeLinearSolvers4
-rw-r--r--Eigen/LU3
-rw-r--r--Eigen/PaStiXSupport4
-rw-r--r--Eigen/QR1
-rw-r--r--Eigen/SPQRSupport2
-rw-r--r--Eigen/SVD10
-rw-r--r--Eigen/SparseCholesky2
-rw-r--r--Eigen/SparseCore23
-rw-r--r--Eigen/SparseLU3
-rw-r--r--Eigen/SparseQR3
-rw-r--r--Eigen/StdDeque2
-rw-r--r--Eigen/StdList2
-rw-r--r--Eigen/StdVector2
-rw-r--r--Eigen/SuperLUSupport4
-rw-r--r--Eigen/UmfPackSupport3
-rw-r--r--Eigen/src/Cholesky/LDLT.h98
-rw-r--r--Eigen/src/Cholesky/LLT.h44
-rw-r--r--Eigen/src/CholmodSupport/CholmodSupport.h80
-rw-r--r--Eigen/src/Core/Array.h25
-rw-r--r--Eigen/src/Core/ArrayBase.h28
-rw-r--r--Eigen/src/Core/ArrayWrapper.h6
-rw-r--r--Eigen/src/Core/Assign.h558
-rw-r--r--Eigen/src/Core/AssignEvaluator.h464
-rw-r--r--Eigen/src/Core/BandMatrix.h25
-rw-r--r--Eigen/src/Core/Block.h34
-rw-r--r--Eigen/src/Core/BooleanRedux.h32
-rw-r--r--Eigen/src/Core/CoreEvaluators.h878
-rw-r--r--Eigen/src/Core/CoreIterators.h143
-rw-r--r--Eigen/src/Core/CwiseBinaryOp.h112
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h10
-rw-r--r--Eigen/src/Core/CwiseUnaryOp.h54
-rw-r--r--Eigen/src/Core/CwiseUnaryView.h44
-rw-r--r--Eigen/src/Core/DenseBase.h54
-rw-r--r--Eigen/src/Core/DenseCoeffsBase.h169
-rw-r--r--Eigen/src/Core/DenseStorage.h16
-rw-r--r--Eigen/src/Core/Diagonal.h39
-rw-r--r--Eigen/src/Core/DiagonalMatrix.h88
-rw-r--r--Eigen/src/Core/DiagonalProduct.h106
-rw-r--r--Eigen/src/Core/Dot.h7
-rw-r--r--Eigen/src/Core/EigenBase.h6
-rw-r--r--Eigen/src/Core/Flagged.h30
-rw-r--r--Eigen/src/Core/ForceAlignedAccess.h24
-rw-r--r--Eigen/src/Core/Fuzzy.h7
-rw-r--r--Eigen/src/Core/GeneralProduct.h422
-rw-r--r--Eigen/src/Core/GenericPacketMath.h8
-rw-r--r--Eigen/src/Core/GlobalFunctions.h4
-rw-r--r--Eigen/src/Core/Inverse.h129
-rw-r--r--Eigen/src/Core/Map.h17
-rw-r--r--Eigen/src/Core/MapBase.h19
-rw-r--r--Eigen/src/Core/MathFunctions.h78
-rw-r--r--Eigen/src/Core/Matrix.h14
-rw-r--r--Eigen/src/Core/MatrixBase.h48
-rw-r--r--Eigen/src/Core/NestByValue.h20
-rw-r--r--Eigen/src/Core/NoAlias.h59
-rw-r--r--Eigen/src/Core/PermutationMatrix.h133
-rw-r--r--Eigen/src/Core/PlainObjectBase.h90
-rw-r--r--Eigen/src/Core/Product.h180
-rw-r--r--Eigen/src/Core/ProductBase.h247
-rw-r--r--Eigen/src/Core/ProductEvaluators.h879
-rw-r--r--Eigen/src/Core/Redux.h120
-rw-r--r--Eigen/src/Core/Ref.h44
-rw-r--r--Eigen/src/Core/Replicate.h6
-rw-r--r--Eigen/src/Core/ReturnByValue.h36
-rw-r--r--Eigen/src/Core/Reverse.h36
-rw-r--r--Eigen/src/Core/Select.h22
-rw-r--r--Eigen/src/Core/SelfAdjointView.h175
-rw-r--r--Eigen/src/Core/SelfCwiseBinaryOp.h179
-rw-r--r--Eigen/src/Core/Solve.h152
-rw-r--r--Eigen/src/Core/SolveTriangular.h10
-rw-r--r--Eigen/src/Core/StableNorm.h10
-rw-r--r--Eigen/src/Core/Stride.h8
-rw-r--r--Eigen/src/Core/Swap.h161
-rw-r--r--Eigen/src/Core/Transpose.h122
-rw-r--r--Eigen/src/Core/Transpositions.h6
-rw-r--r--Eigen/src/Core/TriangularMatrix.h883
-rw-r--r--Eigen/src/Core/VectorwiseOp.h112
-rw-r--r--Eigen/src/Core/Visitor.h44
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h10
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h230
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h433
-rw-r--r--Eigen/src/Core/arch/NEON/Complex.h194
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h233
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h6
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h26
-rw-r--r--Eigen/src/Core/functors/AssignmentFunctors.h33
-rw-r--r--Eigen/src/Core/functors/BinaryFunctors.h6
-rw-r--r--Eigen/src/Core/products/CoeffBasedProduct.h452
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h194
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h142
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h14
-rw-r--r--Eigen/src/Core/products/Parallelizer.h4
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h55
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h81
-rw-r--r--Eigen/src/Core/products/SelfadjointRank2Update.h8
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix.h43
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h130
-rw-r--r--Eigen/src/Core/util/BlasUtil.h2
-rw-r--r--Eigen/src/Core/util/Constants.h49
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h47
-rw-r--r--Eigen/src/Core/util/Macros.h356
-rw-r--r--Eigen/src/Core/util/Memory.h97
-rw-r--r--Eigen/src/Core/util/Meta.h12
-rw-r--r--Eigen/src/Core/util/StaticAssert.h11
-rw-r--r--Eigen/src/Core/util/XprHelper.h169
-rw-r--r--Eigen/src/Eigenvalues/ComplexEigenSolver.h4
-rw-r--r--Eigen/src/Eigenvalues/ComplexSchur.h4
-rw-r--r--Eigen/src/Eigenvalues/ComplexSchur_MKL.h1
-rw-r--r--Eigen/src/Eigenvalues/EigenSolver.h10
-rw-r--r--Eigen/src/Eigenvalues/GeneralizedEigenSolver.h4
-rw-r--r--Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h2
-rw-r--r--Eigen/src/Eigenvalues/HessenbergDecomposition.h4
-rw-r--r--Eigen/src/Eigenvalues/RealQZ.h6
-rw-r--r--Eigen/src/Eigenvalues/RealSchur.h16
-rw-r--r--Eigen/src/Eigenvalues/RealSchur_MKL.h4
-rw-r--r--Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h7
-rw-r--r--Eigen/src/Eigenvalues/Tridiagonalization.h19
-rw-r--r--Eigen/src/Geometry/AlignedBox.h2
-rw-r--r--Eigen/src/Geometry/Homogeneous.h128
-rw-r--r--Eigen/src/Geometry/Hyperplane.h12
-rw-r--r--Eigen/src/Geometry/OrthoMethods.h23
-rw-r--r--Eigen/src/Geometry/Quaternion.h11
-rw-r--r--Eigen/src/Geometry/Rotation2D.h15
-rw-r--r--Eigen/src/Geometry/Transform.h50
-rw-r--r--Eigen/src/Householder/BlockHouseholder.h73
-rw-r--r--Eigen/src/Householder/HouseholderSequence.h43
-rw-r--r--Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h29
-rw-r--r--Eigen/src/IterativeLinearSolvers/BiCGSTAB.h53
-rw-r--r--Eigen/src/IterativeLinearSolvers/ConjugateGradient.h45
-rw-r--r--Eigen/src/IterativeLinearSolvers/IncompleteLUT.h42
-rw-r--r--Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h77
-rw-r--r--Eigen/src/IterativeLinearSolvers/SolveWithGuess.h113
-rw-r--r--Eigen/src/LU/Determinant.h2
-rw-r--r--Eigen/src/LU/FullPivLU.h135
-rw-r--r--Eigen/src/LU/InverseImpl.h (renamed from Eigen/src/LU/Inverse.h)64
-rw-r--r--Eigen/src/LU/PartialPivLU.h84
-rw-r--r--Eigen/src/LU/arch/Inverse_SSE.h11
-rw-r--r--Eigen/src/OrderingMethods/Amd.h5
-rw-r--r--Eigen/src/PaStiXSupport/PaStiXSupport.h89
-rw-r--r--Eigen/src/PardisoSupport/PardisoSupport.h98
-rw-r--r--Eigen/src/QR/ColPivHouseholderQR.h117
-rw-r--r--Eigen/src/QR/ColPivHouseholderQR_MKL.h1
-rw-r--r--Eigen/src/QR/FullPivHouseholderQR.h111
-rw-r--r--Eigen/src/QR/HouseholderQR.h65
-rw-r--r--Eigen/src/SPQRSupport/SuiteSparseQRSupport.h106
-rw-r--r--Eigen/src/SVD/BDCSVD.h1172
-rw-r--r--Eigen/src/SVD/JacobiSVD.h80
-rw-r--r--Eigen/src/SVD/JacobiSVD_MKL.h4
-rw-r--r--Eigen/src/SVD/SVDBase.h41
-rw-r--r--Eigen/src/SVD/UpperBidiagonalization.h8
-rw-r--r--Eigen/src/SparseCholesky/SimplicialCholesky.h208
-rw-r--r--Eigen/src/SparseCholesky/SimplicialCholesky_impl.h2
-rw-r--r--Eigen/src/SparseCore/AmbiVector.h8
-rw-r--r--Eigen/src/SparseCore/CompressedStorage.h61
-rw-r--r--Eigen/src/SparseCore/ConservativeSparseSparseProduct.h13
-rw-r--r--Eigen/src/SparseCore/MappedSparseMatrix.h26
-rw-r--r--Eigen/src/SparseCore/SparseAssign.h192
-rw-r--r--Eigen/src/SparseCore/SparseBlock.h216
-rw-r--r--Eigen/src/SparseCore/SparseCwiseBinaryOp.h316
-rw-r--r--Eigen/src/SparseCore/SparseCwiseUnaryOp.h187
-rw-r--r--Eigen/src/SparseCore/SparseDenseProduct.h346
-rw-r--r--Eigen/src/SparseCore/SparseDiagonalProduct.h239
-rw-r--r--Eigen/src/SparseCore/SparseDot.h17
-rw-r--r--Eigen/src/SparseCore/SparseFuzzy.h29
-rw-r--r--Eigen/src/SparseCore/SparseMatrix.h120
-rw-r--r--Eigen/src/SparseCore/SparseMatrixBase.h170
-rw-r--r--Eigen/src/SparseCore/SparsePermutation.h117
-rw-r--r--Eigen/src/SparseCore/SparseProduct.h220
-rw-r--r--Eigen/src/SparseCore/SparseRedux.h5
-rw-r--r--Eigen/src/SparseCore/SparseSelfAdjointView.h350
-rw-r--r--Eigen/src/SparseCore/SparseSolverBase.h110
-rw-r--r--Eigen/src/SparseCore/SparseSparseProductWithPruning.h58
-rw-r--r--Eigen/src/SparseCore/SparseTranspose.h82
-rw-r--r--Eigen/src/SparseCore/SparseTriangularView.h183
-rw-r--r--Eigen/src/SparseCore/SparseUtil.h28
-rw-r--r--Eigen/src/SparseCore/SparseVector.h46
-rw-r--r--Eigen/src/SparseCore/SparseView.h200
-rw-r--r--Eigen/src/SparseCore/TriangularSolver.h56
-rw-r--r--Eigen/src/SparseLU/SparseLU.h118
-rw-r--r--Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h11
-rw-r--r--Eigen/src/SparseLU/SparseLU_kernel_bmod.h10
-rw-r--r--Eigen/src/SparseQR/SparseQR.h103
-rw-r--r--Eigen/src/StlSupport/StdDeque.h2
-rw-r--r--Eigen/src/StlSupport/StdList.h2
-rw-r--r--Eigen/src/StlSupport/details.h2
-rw-r--r--Eigen/src/SuperLUSupport/SuperLUSupport.h105
-rw-r--r--Eigen/src/UmfPackSupport/UmfPackSupport.h195
-rw-r--r--Eigen/src/misc/Kernel.h2
-rw-r--r--Eigen/src/misc/Solve.h76
-rw-r--r--Eigen/src/misc/SparseSolve.h130
-rw-r--r--Eigen/src/plugins/ArrayCwiseUnaryOps.h78
-rw-r--r--Eigen/src/plugins/CommonCwiseUnaryOps.h37
-rw-r--r--Eigen/src/plugins/MatrixCwiseUnaryOps.h26
-rw-r--r--bench/btl/libs/eigen2/eigen2_interface.hh2
-rw-r--r--blas/CMakeLists.txt27
-rw-r--r--blas/chbmv.f310
-rw-r--r--blas/chpmv.f272
-rw-r--r--blas/ctbmv.f366
-rw-r--r--blas/drotm.f147
-rw-r--r--blas/drotmg.f206
-rw-r--r--blas/dsbmv.f304
-rw-r--r--blas/dspmv.f265
-rw-r--r--blas/dtbmv.f335
-rw-r--r--blas/f2c/chbmv.c487
-rw-r--r--blas/f2c/chpmv.c438
-rw-r--r--blas/f2c/complexdots.c84
-rw-r--r--blas/f2c/ctbmv.c647
-rw-r--r--blas/f2c/d_cnjg.c6
-rw-r--r--blas/f2c/datatypes.h24
-rw-r--r--blas/f2c/drotm.c215
-rw-r--r--blas/f2c/drotmg.c293
-rw-r--r--blas/f2c/dsbmv.c366
-rw-r--r--blas/f2c/dspmv.c316
-rw-r--r--blas/f2c/dtbmv.c428
-rw-r--r--blas/f2c/lsame.c117
-rw-r--r--blas/f2c/r_cnjg.c6
-rw-r--r--blas/f2c/srotm.c216
-rw-r--r--blas/f2c/srotmg.c295
-rw-r--r--blas/f2c/ssbmv.c368
-rw-r--r--blas/f2c/sspmv.c316
-rw-r--r--blas/f2c/stbmv.c428
-rw-r--r--blas/f2c/zhbmv.c488
-rw-r--r--blas/f2c/zhpmv.c438
-rw-r--r--blas/f2c/ztbmv.c647
-rw-r--r--blas/fortran/complexdots.f (renamed from blas/complexdots.f)0
-rw-r--r--blas/lsame.f85
-rw-r--r--blas/srotm.f148
-rw-r--r--blas/srotmg.f208
-rw-r--r--blas/ssbmv.f306
-rw-r--r--blas/sspmv.f265
-rw-r--r--blas/stbmv.f335
-rw-r--r--blas/zhbmv.f310
-rw-r--r--blas/zhpmv.f272
-rw-r--r--blas/ztbmv.f366
-rw-r--r--cmake/EigenConfigureTesting.cmake48
-rw-r--r--cmake/EigenTesting.cmake34
-rw-r--r--cmake/EigenUninstall.cmake40
-rw-r--r--doc/AsciiQuickReference.txt8
-rw-r--r--doc/Manual.dox1
-rw-r--r--doc/NewExpressionType.dox137
-rw-r--r--doc/SparseQuickReference.dox5
-rw-r--r--doc/examples/make_circulant.cpp11
-rw-r--r--doc/examples/make_circulant.cpp.entry5
-rw-r--r--doc/examples/make_circulant.cpp.evaluator33
-rw-r--r--doc/examples/make_circulant.cpp.expression20
-rw-r--r--doc/examples/make_circulant.cpp.main8
-rw-r--r--doc/examples/make_circulant.cpp.preamble4
-rw-r--r--doc/examples/make_circulant.cpp.traits19
-rw-r--r--failtest/CMakeLists.txt15
-rw-r--r--failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp15
-rw-r--r--failtest/cwiseunaryview_on_const_type_actually_const.cpp16
-rw-r--r--failtest/ref_1.cpp18
-rw-r--r--failtest/ref_2.cpp15
-rw-r--r--failtest/ref_3.cpp15
-rw-r--r--failtest/ref_4.cpp15
-rw-r--r--failtest/ref_5.cpp16
-rw-r--r--failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp15
-rw-r--r--failtest/selfadjointview_on_const_type_actually_const.cpp16
-rw-r--r--failtest/swap_1.cpp14
-rw-r--r--failtest/swap_2.cpp14
-rw-r--r--failtest/triangularview_nonconst_ctor_on_const_xpr.cpp15
-rw-r--r--failtest/triangularview_on_const_type_actually_const.cpp16
-rw-r--r--lapack/complex_double.cpp3
-rw-r--r--lapack/complex_single.cpp3
-rw-r--r--lapack/double.cpp3
-rw-r--r--lapack/eigenvalues.cpp23
-rw-r--r--lapack/lapack_common.h7
-rw-r--r--lapack/single.cpp3
-rw-r--r--lapack/svd.cpp137
-rw-r--r--test/CMakeLists.txt47
-rw-r--r--test/adjoint.cpp22
-rw-r--r--test/array.cpp25
-rw-r--r--test/bdcsvd.cpp111
-rw-r--r--test/block.cpp8
-rw-r--r--test/cholesky.cpp12
-rw-r--r--test/cuda_basic.cu6
-rw-r--r--test/diagonalmatrices.cpp7
-rw-r--r--test/eigensolver_selfadjoint.cpp22
-rw-r--r--test/evaluators.cpp142
-rw-r--r--test/geo_homogeneous.cpp7
-rw-r--r--test/geo_hyperplane.cpp29
-rw-r--r--test/geo_orthomethods.cpp14
-rw-r--r--test/geo_transformations.cpp49
-rw-r--r--test/inverse.cpp9
-rw-r--r--test/jacobisvd.cpp413
-rw-r--r--test/linearstructure.cpp18
-rw-r--r--test/main.h33
-rw-r--r--test/mixingtypes.cpp9
-rw-r--r--test/nesting_ops.cpp2
-rw-r--r--test/nomalloc.cpp61
-rw-r--r--test/nullary.cpp4
-rw-r--r--test/packetmath.cpp24
-rw-r--r--test/product_mmtr.cpp3
-rw-r--r--test/product_notemporary.cpp3
-rw-r--r--test/product_small.cpp10
-rw-r--r--test/qr_fullpivoting.cpp6
-rw-r--r--test/ref.cpp8
-rw-r--r--test/sparse_basic.cpp214
-rw-r--r--test/sparse_product.cpp44
-rw-r--r--test/sparse_solver.h70
-rw-r--r--test/sparse_vector.cpp19
-rw-r--r--test/sparselu.cpp3
-rw-r--r--test/stable_norm.cpp30
-rw-r--r--test/svd_common.h493
-rw-r--r--test/swap.cpp6
-rw-r--r--test/vectorization_logic.cpp49
-rw-r--r--test/vectorwiseop.cpp4
-rw-r--r--unsupported/Eigen/AlignedVector325
-rw-r--r--unsupported/Eigen/BDCSVD26
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h3
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h16
-rw-r--r--unsupported/Eigen/IterativeSolvers3
-rw-r--r--unsupported/Eigen/MPRealSupport4
-rw-r--r--unsupported/Eigen/OpenGLSupport38
-rw-r--r--unsupported/Eigen/SparseExtra3
-rw-r--r--unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h1
-rw-r--r--unsupported/Eigen/src/BDCSVD/BDCSVD.h949
-rw-r--r--unsupported/Eigen/src/BDCSVD/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/BDCSVD/TODOBdcsvd.txt29
-rw-r--r--unsupported/Eigen/src/BDCSVD/doneInBDCSVD.txt21
-rw-r--r--unsupported/Eigen/src/CMakeLists.txt1
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/DGMRES.h39
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/GMRES.h41
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h37
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h39
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/MINRES.h46
-rw-r--r--unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h56
-rw-r--r--unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h9
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h5
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h11
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h28
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixPower.h4
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h8
-rw-r--r--unsupported/Eigen/src/Polynomials/PolynomialUtils.h2
-rw-r--r--unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h32
-rw-r--r--unsupported/test/CMakeLists.txt8
-rw-r--r--unsupported/test/NonLinearOptimization.cpp16
-rw-r--r--unsupported/test/bdcsvd.cpp213
-rw-r--r--unsupported/test/jacobisvd.cpp198
-rw-r--r--unsupported/test/kronecker_product.cpp12
-rw-r--r--unsupported/test/svd_common.h261
345 files changed, 19381 insertions, 15099 deletions
diff --git a/.hgignore b/.hgignore
index e33ba2e9d..769a47f1f 100644
--- a/.hgignore
+++ b/.hgignore
@@ -30,3 +30,5 @@ log
patch
a
a.*
+lapack/testing
+lapack/reference
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 96d6c8701..00287c9bb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
project(Eigen)
-cmake_minimum_required(VERSION 2.8.2)
+cmake_minimum_required(VERSION 2.8.4)
# guard against in-source builds
@@ -219,12 +219,26 @@ if(NOT MSVC)
message(STATUS "Enabling AltiVec in tests/examples")
endif()
+ option(EIGEN_TEST_VSX "Enable/Disable VSX in tests/examples" OFF)
+ if(EIGEN_TEST_VSX)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mvsx")
+ message(STATUS "Enabling VSX in tests/examples")
+ endif()
+
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
message(STATUS "Enabling NEON in tests/examples")
endif()
+ option(EIGEN_TEST_NEON64 "Enable/Disable Neon in tests/examples" OFF)
+ if(EIGEN_TEST_NEON64)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ message(STATUS "Enabling NEON in tests/examples")
+ endif()
+
+
+
check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP)
if(COMPILER_SUPPORT_OPENMP)
option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF)
@@ -434,6 +448,7 @@ if(cmake_generator_tolower MATCHES "makefile")
message(STATUS "make check | Build and run the unit-tests. Read this page:")
message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests")
message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)")
+ message(STATUS "make uninstall| Removes files installed by make install")
message(STATUS "--------------+--------------------------------------------------------------")
else()
message(STATUS "To build/run the unit tests, read this page:")
@@ -469,3 +484,7 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
DESTINATION ${EIGEN_CONFIG_CMAKE_PATH}
)
+
+# Add uninstall target
+add_custom_target ( uninstall
+ COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake)
diff --git a/Eigen/Cholesky b/Eigen/Cholesky
index 7314d326c..dd0ca911c 100644
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@@ -21,7 +21,6 @@
* \endcode
*/
-#include "src/misc/Solve.h"
#include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE
diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport
index 745b884e7..687cd9777 100644
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -33,12 +33,8 @@ extern "C" {
*
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
#include "src/CholmodSupport/CholmodSupport.h"
-
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CHOLMODSUPPORT_MODULE_H
diff --git a/Eigen/Core b/Eigen/Core
index acdeca5f4..0263caf20 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -57,7 +57,7 @@
// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
-#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6)
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
#pragma GCC optimize ("-fno-ipa-cp-clone")
#endif
@@ -75,18 +75,18 @@
#endif
#endif
-#ifdef _MSC_VER
+#if EIGEN_COMP_MSVC
#include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
- #if (_MSC_VER >= 1500) // 2008 or later
+ #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
// Remember that usage of defined() in a #define is undefined by the standard.
// a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
- #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
+ #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#endif
#else
// Remember that usage of defined() in a #define is undefined by the standard
- #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) )
+ #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
#define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
#endif
#endif
@@ -125,9 +125,7 @@
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
- #ifdef __FMA__
- #define EIGEN_VECTORIZE_FMA
- #endif
+
// include files
// This extern "C" works around a MINGW-w64 compilation issue
@@ -140,7 +138,7 @@
extern "C" {
// In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
// Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
- #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110
+ #if EIGEN_COMP_ICC >= 1110
#include <immintrin.h>
#else
#include <emmintrin.h>
@@ -162,6 +160,15 @@
#endif
#endif
} // end extern "C"
+ #elif defined __VSX__
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_VSX
+ #include <altivec.h>
+ // We need to #undef all these ugly tokens defined in <altivec.h>
+ // => use __vector instead of vector
+ #undef bool
+ #undef vector
+ #undef pixel
#elif defined __ALTIVEC__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC
@@ -171,7 +178,7 @@
#undef bool
#undef vector
#undef pixel
- #elif defined __ARM_NEON__
+ #elif defined __ARM_NEON
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON
#include <arm_neon.h>
@@ -192,7 +199,7 @@
#endif
// MSVC for windows mobile does not have the errno.h file
-#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION)
+#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
#define EIGEN_HAS_ERRNO
#endif
@@ -218,7 +225,7 @@
#endif
// required for __cpuid, needs to be included after cmath
-#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE))
+#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
#include <intrin.h>
#endif
@@ -240,6 +247,8 @@ inline static const char *SimdInstructionSetsInUse(void) {
return "SSE, SSE2";
#elif defined(EIGEN_VECTORIZE_ALTIVEC)
return "AltiVec";
+#elif defined(EIGEN_VECTORIZE_VSX)
+ return "VSX";
#elif defined(EIGEN_VECTORIZE_NEON)
return "ARM NEON";
#else
@@ -271,8 +280,8 @@ using std::ptrdiff_t;
*/
#include "src/Core/util/Constants.h"
-#include "src/Core/util/ForwardDeclarations.h"
#include "src/Core/util/Meta.h"
+#include "src/Core/util/ForwardDeclarations.h"
#include "src/Core/util/StaticAssert.h"
#include "src/Core/util/XprHelper.h"
#include "src/Core/util/Memory.h"
@@ -291,7 +300,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/SSE/Complex.h"
-#elif defined EIGEN_VECTORIZE_ALTIVEC
+#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
#include "src/Core/arch/AltiVec/PacketMath.h"
#include "src/Core/arch/AltiVec/Complex.h"
#elif defined EIGEN_VECTORIZE_NEON
@@ -310,19 +319,16 @@ using std::ptrdiff_t;
#include "src/Core/functors/UnaryFunctors.h"
#include "src/Core/functors/NullaryFunctors.h"
#include "src/Core/functors/StlFunctors.h"
+#include "src/Core/functors/AssignmentFunctors.h"
#include "src/Core/DenseCoeffsBase.h"
#include "src/Core/DenseBase.h"
#include "src/Core/MatrixBase.h"
#include "src/Core/EigenBase.h"
-#ifdef EIGEN_ENABLE_EVALUATORS
-#include "src/Core/functors/AssignmentFunctors.h"
#include "src/Core/Product.h"
#include "src/Core/CoreEvaluators.h"
#include "src/Core/AssignEvaluator.h"
-#include "src/Core/ProductEvaluators.h"
-#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
// at least confirmed with Doxygen 1.5.5 and 1.5.6
@@ -333,7 +339,10 @@ using std::ptrdiff_t;
#include "src/Core/util/BlasUtil.h"
#include "src/Core/DenseStorage.h"
#include "src/Core/NestByValue.h"
-#include "src/Core/ForceAlignedAccess.h"
+
+// #include "src/Core/ForceAlignedAccess.h"
+// #include "src/Core/Flagged.h"
+
#include "src/Core/ReturnByValue.h"
#include "src/Core/NoAlias.h"
#include "src/Core/PlainObjectBase.h"
@@ -346,12 +355,12 @@ using std::ptrdiff_t;
#include "src/Core/SelfCwiseBinaryOp.h"
#include "src/Core/Dot.h"
#include "src/Core/StableNorm.h"
-#include "src/Core/MapBase.h"
#include "src/Core/Stride.h"
+#include "src/Core/MapBase.h"
#include "src/Core/Map.h"
+#include "src/Core/Ref.h"
#include "src/Core/Block.h"
#include "src/Core/VectorBlock.h"
-#include "src/Core/Ref.h"
#include "src/Core/Transpose.h"
#include "src/Core/DiagonalMatrix.h"
#include "src/Core/Diagonal.h"
@@ -364,14 +373,15 @@ using std::ptrdiff_t;
#include "src/Core/IO.h"
#include "src/Core/Swap.h"
#include "src/Core/CommaInitializer.h"
-#include "src/Core/Flagged.h"
#include "src/Core/ProductBase.h"
#include "src/Core/GeneralProduct.h"
+#include "src/Core/Solve.h"
+#include "src/Core/Inverse.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/Parallelizer.h"
-#include "src/Core/products/CoeffBasedProduct.h"
+#include "src/Core/ProductEvaluators.h"
#include "src/Core/products/GeneralMatrixVector.h"
#include "src/Core/products/GeneralMatrixMatrix.h"
#include "src/Core/SolveTriangular.h"
diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers
index 0f4159dc1..c06668bd2 100644
--- a/Eigen/IterativeLinearSolvers
+++ b/Eigen/IterativeLinearSolvers
@@ -26,9 +26,7 @@
* \endcode
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
+#include "src/IterativeLinearSolvers/SolveWithGuess.h"
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
diff --git a/Eigen/LU b/Eigen/LU
index 29a98cb9a..132ecc42c 100644
--- a/Eigen/LU
+++ b/Eigen/LU
@@ -16,7 +16,6 @@
* \endcode
*/
-#include "src/misc/Solve.h"
#include "src/misc/Kernel.h"
#include "src/misc/Image.h"
#include "src/LU/FullPivLU.h"
@@ -25,7 +24,7 @@
#include "src/LU/PartialPivLU_MKL.h"
#endif
#include "src/LU/Determinant.h"
-#include "src/LU/Inverse.h"
+#include "src/LU/InverseImpl.h"
// Use the SSE optimized version whenever possible. At the moment the
// SSE version doesn't compile when AVX is enabled
diff --git a/Eigen/PaStiXSupport b/Eigen/PaStiXSupport
index 7c616ee5e..e7d275f97 100644
--- a/Eigen/PaStiXSupport
+++ b/Eigen/PaStiXSupport
@@ -35,12 +35,8 @@ extern "C" {
*
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
#include "src/PaStiXSupport/PaStiXSupport.h"
-
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_PASTIXSUPPORT_MODULE_H
diff --git a/Eigen/QR b/Eigen/QR
index 4c2533610..230cb079a 100644
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -24,7 +24,6 @@
* \endcode
*/
-#include "src/misc/Solve.h"
#include "src/QR/HouseholderQR.h"
#include "src/QR/FullPivHouseholderQR.h"
#include "src/QR/ColPivHouseholderQR.h"
diff --git a/Eigen/SPQRSupport b/Eigen/SPQRSupport
index 77016442e..e3f49bb5a 100644
--- a/Eigen/SPQRSupport
+++ b/Eigen/SPQRSupport
@@ -21,8 +21,6 @@
*
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
#include "src/CholmodSupport/CholmodSupport.h"
#include "src/SPQRSupport/SuiteSparseQRSupport.h"
diff --git a/Eigen/SVD b/Eigen/SVD
index c3d24286c..dbd37b17a 100644
--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -12,21 +12,25 @@
*
*
* This module provides SVD decomposition for matrices (both real and complex).
- * This decomposition is accessible via the following MatrixBase method:
+ * Two decomposition algorithms are provided:
+ * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones.
+ * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems.
+ * These decompositions are accessible via the respective classes and following MatrixBase methods:
* - MatrixBase::jacobiSvd()
+ * - MatrixBase::bdcSvd()
*
* \code
* #include <Eigen/SVD>
* \endcode
*/
-#include "src/misc/Solve.h"
+#include "src/SVD/UpperBidiagonalization.h"
#include "src/SVD/SVDBase.h"
#include "src/SVD/JacobiSVD.h"
+#include "src/SVD/BDCSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#include "src/SVD/JacobiSVD_MKL.h"
#endif
-#include "src/SVD/UpperBidiagonalization.h"
#include "src/Core/util/ReenableStupidWarnings.h"
diff --git a/Eigen/SparseCholesky b/Eigen/SparseCholesky
index 9f5056aa1..b6a320c40 100644
--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@@ -34,8 +34,6 @@
#error The SparseCholesky module has nothing to offer in MPL2 only mode
#endif
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
#include "src/SparseCholesky/SimplicialCholesky.h"
#ifndef EIGEN_MPL2_ONLY
diff --git a/Eigen/SparseCore b/Eigen/SparseCore
index 9b5be5e15..d5c0f6271 100644
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -26,37 +26,32 @@
* This module depends on: Core.
*/
-namespace Eigen {
-
-/** The type used to identify a general sparse storage. */
-struct Sparse {};
-
-}
-
#include "src/SparseCore/SparseUtil.h"
#include "src/SparseCore/SparseMatrixBase.h"
+#include "src/SparseCore/SparseAssign.h"
#include "src/SparseCore/CompressedStorage.h"
#include "src/SparseCore/AmbiVector.h"
#include "src/SparseCore/SparseMatrix.h"
#include "src/SparseCore/MappedSparseMatrix.h"
#include "src/SparseCore/SparseVector.h"
-#include "src/SparseCore/SparseBlock.h"
-#include "src/SparseCore/SparseTranspose.h"
#include "src/SparseCore/SparseCwiseUnaryOp.h"
#include "src/SparseCore/SparseCwiseBinaryOp.h"
+#include "src/SparseCore/SparseTranspose.h"
+#include "src/SparseCore/SparseBlock.h"
#include "src/SparseCore/SparseDot.h"
-#include "src/SparseCore/SparsePermutation.h"
#include "src/SparseCore/SparseRedux.h"
-#include "src/SparseCore/SparseFuzzy.h"
+#include "src/SparseCore/SparseView.h"
+#include "src/SparseCore/SparseDiagonalProduct.h"
#include "src/SparseCore/ConservativeSparseSparseProduct.h"
#include "src/SparseCore/SparseSparseProductWithPruning.h"
#include "src/SparseCore/SparseProduct.h"
#include "src/SparseCore/SparseDenseProduct.h"
-#include "src/SparseCore/SparseDiagonalProduct.h"
-#include "src/SparseCore/SparseTriangularView.h"
#include "src/SparseCore/SparseSelfAdjointView.h"
+#include "src/SparseCore/SparseTriangularView.h"
#include "src/SparseCore/TriangularSolver.h"
-#include "src/SparseCore/SparseView.h"
+#include "src/SparseCore/SparsePermutation.h"
+#include "src/SparseCore/SparseFuzzy.h"
+#include "src/SparseCore/SparseSolverBase.h"
#include "src/Core/util/ReenableStupidWarnings.h"
diff --git a/Eigen/SparseLU b/Eigen/SparseLU
index 8527a49bd..38b38b531 100644
--- a/Eigen/SparseLU
+++ b/Eigen/SparseLU
@@ -20,9 +20,6 @@
* Please, see the documentation of the SparseLU class for more details.
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
// Ordering interface
#include "OrderingMethods"
diff --git a/Eigen/SparseQR b/Eigen/SparseQR
index 4ee42065e..efb2695ba 100644
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@@ -21,9 +21,6 @@
*
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
#include "OrderingMethods"
#include "src/SparseCore/SparseColEtree.h"
#include "src/SparseQR/SparseQR.h"
diff --git a/Eigen/StdDeque b/Eigen/StdDeque
index f27234778..be3a7f82b 100644
--- a/Eigen/StdDeque
+++ b/Eigen/StdDeque
@@ -14,7 +14,7 @@
#include "Core"
#include <deque>
-#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
+#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)
diff --git a/Eigen/StdList b/Eigen/StdList
index 225c1e18f..07ba1297b 100644
--- a/Eigen/StdList
+++ b/Eigen/StdList
@@ -13,7 +13,7 @@
#include "Core"
#include <list>
-#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
+#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...)
diff --git a/Eigen/StdVector b/Eigen/StdVector
index 6b22627f6..fdfc37766 100644
--- a/Eigen/StdVector
+++ b/Eigen/StdVector
@@ -14,7 +14,7 @@
#include "Core"
#include <vector>
-#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
+#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...)
diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport
index 575e14fbc..d1eac9464 100644
--- a/Eigen/SuperLUSupport
+++ b/Eigen/SuperLUSupport
@@ -48,12 +48,8 @@ namespace Eigen { struct SluMatrix; }
*
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
#include "src/SuperLUSupport/SuperLUSupport.h"
-
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_SUPERLUSUPPORT_MODULE_H
diff --git a/Eigen/UmfPackSupport b/Eigen/UmfPackSupport
index 984f64a84..0efad5dee 100644
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -26,9 +26,6 @@ extern "C" {
*
*/
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
#include "src/UmfPackSupport/UmfPackSupport.h"
#include "src/Core/util/ReenableStupidWarnings.h"
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index aa9784e54..5acbf4651 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -85,7 +85,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
* according to the specified problem \a size.
* \sa LDLT()
*/
- LDLT(Index size)
+ explicit LDLT(Index size)
: m_matrix(size, size),
m_transpositions(size),
m_temporary(size),
@@ -98,7 +98,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
* This calculates the decomposition for the input \a matrix.
* \sa LDLT(Index size)
*/
- LDLT(const MatrixType& matrix)
+ explicit LDLT(const MatrixType& matrix)
: m_matrix(matrix.rows(), matrix.cols()),
m_transpositions(matrix.rows()),
m_temporary(matrix.rows()),
@@ -175,13 +175,13 @@ template<typename _MatrixType, int _UpLo> class LDLT
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
*/
template<typename Rhs>
- inline const internal::solve_retval<LDLT, Rhs>
+ inline const Solve<LDLT, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
eigen_assert(m_matrix.rows()==b.rows()
&& "LDLT::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
+ return Solve<LDLT, Rhs>(*this, b.derived());
}
template<typename Derived>
@@ -217,6 +217,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return Success;
}
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const;
+ #endif
protected:
@@ -400,16 +406,16 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
{
typedef const TriangularView<const MatrixType, UnitLower> MatrixL;
typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return m; }
- static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+ static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
+ static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
};
template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
{
typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
typedef const TriangularView<const MatrixType, UnitUpper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
- static inline MatrixU getU(const MatrixType& m) { return m; }
+ static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
+ static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
};
} // end namespace internal
@@ -427,6 +433,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
m_transpositions.resize(size);
m_isInitialized = false;
m_temporary.resize(size);
+ m_sign = internal::ZeroSign;
internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
@@ -466,52 +473,45 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
return *this;
}
-namespace internal {
-template<typename _MatrixType, int _UpLo, typename Rhs>
-struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
- : solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType, int _UpLo>
+template<typename RhsType, typename DstType>
+void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
- typedef LDLT<_MatrixType,_UpLo> LDLTType;
- EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
+ eigen_assert(rhs.rows() == rows());
+ // dst = P b
+ dst = m_transpositions * rhs;
+
+ // dst = L^-1 (P b)
+ matrixL().solveInPlace(dst);
+
+ // dst = D^-1 (L^-1 P b)
+ // more precisely, use pseudo-inverse of D (see bug 241)
+ using std::abs;
+ const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
+ // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
+ // as motivated by LAPACK's xGELSS:
+ // RealScalar tolerance = numext::maxi(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
+ // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
+ // diagonal element is not well justified and to numerical issues in some cases.
+ // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
+ RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
+
+ for (Index i = 0; i < vecD.size(); ++i)
{
- eigen_assert(rhs().rows() == dec().matrixLDLT().rows());
- // dst = P b
- dst = dec().transpositionsP() * rhs();
-
- // dst = L^-1 (P b)
- dec().matrixL().solveInPlace(dst);
-
- // dst = D^-1 (L^-1 P b)
- // more precisely, use pseudo-inverse of D (see bug 241)
- using std::abs;
- EIGEN_USING_STD_MATH(max);
- typedef typename LDLTType::MatrixType MatrixType;
- typedef typename LDLTType::RealScalar RealScalar;
- const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD());
- // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
- // as motivated by LAPACK's xGELSS:
- // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
- // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
- // diagonal element is not well justified and to numerical issues in some cases.
- // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
- RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
- for (Index i = 0; i < vectorD.size(); ++i) {
- if(abs(vectorD(i)) > tolerance)
- dst.row(i) /= vectorD(i);
- else
- dst.row(i).setZero();
- }
+ if(abs(vecD(i)) > tolerance)
+ dst.row(i) /= vecD(i);
+ else
+ dst.row(i).setZero();
+ }
- // dst = L^-T (D^-1 L^-1 P b)
- dec().matrixU().solveInPlace(dst);
+ // dst = L^-T (D^-1 L^-1 P b)
+ matrixU().solveInPlace(dst);
- // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
- dst = dec().transpositionsP().transpose() * dst;
- }
-};
+ // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
+ dst = m_transpositions.transpose() * dst;
}
+#endif
/** \internal use x = ldlt_object.solve(x);
*
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 38e820165..90194e64d 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -83,10 +83,10 @@ template<typename _MatrixType, int _UpLo> class LLT
* according to the specified problem \a size.
* \sa LLT()
*/
- LLT(Index size) : m_matrix(size, size),
+ explicit LLT(Index size) : m_matrix(size, size),
m_isInitialized(false) {}
- LLT(const MatrixType& matrix)
+ explicit LLT(const MatrixType& matrix)
: m_matrix(matrix.rows(), matrix.cols()),
m_isInitialized(false)
{
@@ -118,13 +118,13 @@ template<typename _MatrixType, int _UpLo> class LLT
* \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
*/
template<typename Rhs>
- inline const internal::solve_retval<LLT, Rhs>
+ inline const Solve<LLT, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_matrix.rows()==b.rows()
&& "LLT::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<LLT, Rhs>(*this, b.derived());
+ return Solve<LLT, Rhs>(*this, b.derived());
}
template<typename Derived>
@@ -161,6 +161,12 @@ template<typename _MatrixType, int _UpLo> class LLT
template<typename VectorType>
LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const;
+ #endif
protected:
/** \internal
@@ -345,8 +351,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
{
typedef const TriangularView<const MatrixType, Lower> MatrixL;
typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return m; }
- static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+ static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
+ static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
};
@@ -355,8 +361,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
{
typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
typedef const TriangularView<const MatrixType, Upper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
- static inline MatrixU getU(const MatrixType& m) { return m; }
+ static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
+ static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
static bool inplace_decomposition(MatrixType& m)
{ return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
};
@@ -404,22 +410,16 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c
return *this;
}
-
-namespace internal {
-template<typename _MatrixType, int UpLo, typename Rhs>
-struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
- : solve_retval_base<LLT<_MatrixType, UpLo>, Rhs>
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType,int _UpLo>
+template<typename RhsType, typename DstType>
+void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
- typedef LLT<_MatrixType,UpLo> LLTType;
- EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dst = rhs();
- dec().solveInPlace(dst);
- }
-};
+ dst = rhs;
+ solveInPlace(dst);
}
+#endif
/** \internal use x = llt_object.solve(x);
*
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index c449960de..3eadb83a0 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -105,7 +105,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>&
/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix.
* The data are not copied but shared. */
template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
-cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
+cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
{
cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived());
@@ -157,8 +157,12 @@ enum CholmodMode {
* \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
*/
template<typename _MatrixType, int _UpLo, typename Derived>
-class CholmodBase : internal::noncopyable
+class CholmodBase : public SparseSolverBase<Derived>
{
+ protected:
+ typedef SparseSolverBase<Derived> Base;
+ using Base::derived;
+ using Base::m_isInitialized;
public:
typedef _MatrixType MatrixType;
enum { UpLo = _UpLo };
@@ -170,14 +174,14 @@ class CholmodBase : internal::noncopyable
public:
CholmodBase()
- : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
+ : m_cholmodFactor(0), m_info(Success)
{
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
cholmod_start(&m_cholmod);
}
- CholmodBase(const MatrixType& matrix)
- : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
+ explicit CholmodBase(const MatrixType& matrix)
+ : m_cholmodFactor(0), m_info(Success)
{
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
cholmod_start(&m_cholmod);
@@ -194,9 +198,6 @@ class CholmodBase : internal::noncopyable
inline Index cols() const { return m_cholmodFactor->n; }
inline Index rows() const { return m_cholmodFactor->n; }
- Derived& derived() { return *static_cast<Derived*>(this); }
- const Derived& derived() const { return *static_cast<const Derived*>(this); }
-
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was succesful,
@@ -216,34 +217,6 @@ class CholmodBase : internal::noncopyable
return derived();
}
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::solve_retval<CholmodBase, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "LLT is not initialized.");
- eigen_assert(rows()==b.rows()
- && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<CholmodBase, Rhs>(*this, b.derived());
- }
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::sparse_solve_retval<CholmodBase, Rhs>
- solve(const SparseMatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "LLT is not initialized.");
- eigen_assert(rows()==b.rows()
- && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
- return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived());
- }
-
/** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
*
* This function is particularly useful when solving for several problems having the same structure.
@@ -290,7 +263,7 @@ class CholmodBase : internal::noncopyable
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+ void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
{
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
const Index size = m_cholmodFactor->n;
@@ -312,7 +285,7 @@ class CholmodBase : internal::noncopyable
/** \internal */
template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex>
- void _solve(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
+ void _solve_impl(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
{
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
const Index size = m_cholmodFactor->n;
@@ -357,7 +330,6 @@ class CholmodBase : internal::noncopyable
cholmod_factor* m_cholmodFactor;
RealScalar m_shiftOffset[2];
mutable ComputationInfo m_info;
- bool m_isInitialized;
int m_factorizationIsOk;
int m_analysisIsOk;
};
@@ -572,36 +544,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
}
};
-namespace internal {
-
-template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
-struct solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
- : solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
-{
- typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
-struct sparse_solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
- : sparse_solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
-{
- typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_CHOLMODSUPPORT_H
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 28d6f1443..337086615 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -74,6 +74,21 @@ class Array
{
return Base::operator=(other);
}
+
+ /** Set all the entries to \a value.
+ * \sa DenseBase::setConstant(), DenseBase::fill()
+ */
+ /* This overload is needed because the usage of
+ * using Base::operator=;
+ * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
+ * the usage of 'using'. This should be done only for operator=.
+ */
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
+ {
+ Base::setConstant(value);
+ return *this;
+ }
/** Copies the value of the expression \a other into \c *this with automatic resizing.
*
@@ -99,7 +114,7 @@ class Array
{
return Base::_set(other);
}
-
+
/** Default constructor.
*
* For fixed-size matrices, does nothing.
@@ -144,7 +159,6 @@ class Array
}
#endif
-
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename T>
EIGEN_DEVICE_FUNC
@@ -244,13 +258,6 @@ class Array
*this = other;
}
- /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the
- * data pointers.
- */
- template<typename OtherDerived>
- void swap(ArrayBase<OtherDerived> const & other)
- { this->_swap(other.derived()); }
-
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index 2c9ace4a7..d42693d4b 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -64,8 +64,7 @@ template<typename Derived> class ArrayBase
using Base::MaxSizeAtCompileTime;
using Base::IsVectorAtCompileTime;
using Base::Flags;
- using Base::CoeffReadCost;
-
+
using Base::derived;
using Base::const_cast_derived;
using Base::rows;
@@ -121,8 +120,15 @@ template<typename Derived> class ArrayBase
EIGEN_DEVICE_FUNC
Derived& operator=(const ArrayBase& other)
{
- return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+ internal::call_assignment(derived(), other.derived());
+ return derived();
}
+
+ /** Set all the entries to \a value.
+ * \sa DenseBase::setConstant(), DenseBase::fill() */
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const Scalar &value)
+ { Base::setConstant(value); return derived(); }
EIGEN_DEVICE_FUNC
Derived& operator+=(const Scalar& scalar);
@@ -153,9 +159,9 @@ template<typename Derived> class ArrayBase
/** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
* \sa MatrixBase::array() */
EIGEN_DEVICE_FUNC
- MatrixWrapper<Derived> matrix() { return derived(); }
+ MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
EIGEN_DEVICE_FUNC
- const MatrixWrapper<const Derived> matrix() const { return derived(); }
+ const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }
// template<typename Dest>
// inline void evalTo(Dest& dst) const { dst = matrix(); }
@@ -186,8 +192,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{
- SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
- tmp = other.derived();
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
return derived();
}
@@ -200,8 +205,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{
- SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
- tmp = other.derived();
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
return derived();
}
@@ -214,8 +218,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{
- SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
- tmp = other.derived();
+ call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
return derived();
}
@@ -228,8 +231,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{
- SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
- tmp = other.derived();
+ call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar>());
return derived();
}
diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h
index 28d7b7bd5..0b89c58cb 100644
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -44,6 +44,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
typedef ArrayBase<ArrayWrapper> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
+ typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
@@ -54,7 +55,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+ explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); }
@@ -186,6 +187,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
+ typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value,
@@ -196,7 +198,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
EIGEN_DEVICE_FUNC
- inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
+ explicit inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); }
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 07da2fe31..53806ba33 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -14,485 +14,6 @@
namespace Eigen {
-namespace internal {
-
-/***************************************************************************
-* Part 1 : the logic deciding a strategy for traversal and unrolling *
-***************************************************************************/
-
-template <typename Derived, typename OtherDerived>
-struct assign_traits
-{
-public:
- enum {
- DstIsAligned = Derived::Flags & AlignedBit,
- DstHasDirectAccess = Derived::Flags & DirectAccessBit,
- SrcIsAligned = OtherDerived::Flags & AlignedBit,
- JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
- };
-
-private:
- enum {
- InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
- : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
- : int(Derived::RowsAtCompileTime),
- InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
- : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
- : int(Derived::MaxRowsAtCompileTime),
- MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
- PacketSize = packet_traits<typename Derived::Scalar>::size
- };
-
- enum {
- StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
- MightVectorize = StorageOrdersAgree
- && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
- MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
- && int(DstIsAligned) && int(SrcIsAligned),
- MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
- MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
- && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
- /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
- so it's only good for large enough sizes. */
- MaySliceVectorize = MightVectorize && DstHasDirectAccess
- && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
- /* slice vectorization can be slow, so we only want it if the slices are big, which is
- indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
- in a fixed-size matrix */
- };
-
-public:
- enum {
- Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
- : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
- : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
- : int(MayLinearize) ? int(LinearTraversal)
- : int(DefaultTraversal),
- Vectorized = int(Traversal) == InnerVectorizedTraversal
- || int(Traversal) == LinearVectorizedTraversal
- || int(Traversal) == SliceVectorizedTraversal
- };
-
-private:
- enum {
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
- MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
- && int(OtherDerived::CoeffReadCost) != Dynamic
- && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
- MayUnrollInner = int(InnerSize) != Dynamic
- && int(OtherDerived::CoeffReadCost) != Dynamic
- && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
- };
-
-public:
- enum {
- Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
- ? (
- int(MayUnrollCompletely) ? int(CompleteUnrolling)
- : int(MayUnrollInner) ? int(InnerUnrolling)
- : int(NoUnrolling)
- )
- : int(Traversal) == int(LinearVectorizedTraversal)
- ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
- : int(Traversal) == int(LinearTraversal)
- ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
- : int(NoUnrolling)
- };
-
-#ifdef EIGEN_DEBUG_ASSIGN
- static void debug()
- {
- EIGEN_DEBUG_VAR(DstIsAligned)
- EIGEN_DEBUG_VAR(SrcIsAligned)
- EIGEN_DEBUG_VAR(JointAlignment)
- EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime)
- EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost)
- EIGEN_DEBUG_VAR(InnerSize)
- EIGEN_DEBUG_VAR(InnerMaxSize)
- EIGEN_DEBUG_VAR(PacketSize)
- EIGEN_DEBUG_VAR(StorageOrdersAgree)
- EIGEN_DEBUG_VAR(MightVectorize)
- EIGEN_DEBUG_VAR(MayLinearize)
- EIGEN_DEBUG_VAR(MayInnerVectorize)
- EIGEN_DEBUG_VAR(MayLinearVectorize)
- EIGEN_DEBUG_VAR(MaySliceVectorize)
- EIGEN_DEBUG_VAR(Traversal)
- EIGEN_DEBUG_VAR(UnrollingLimit)
- EIGEN_DEBUG_VAR(MayUnrollCompletely)
- EIGEN_DEBUG_VAR(MayUnrollInner)
- EIGEN_DEBUG_VAR(Unrolling)
- }
-#endif
-};
-
-/***************************************************************************
-* Part 2 : meta-unrollers
-***************************************************************************/
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_DefaultTraversal_CompleteUnrolling
-{
- enum {
- outer = Index / Derived1::InnerSizeAtCompileTime,
- inner = Index % Derived1::InnerSizeAtCompileTime
- };
-
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- dst.copyCoeffByOuterInner(outer, inner, src);
- assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
-{
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_DefaultTraversal_InnerUnrolling
-{
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
- {
- dst.copyCoeffByOuterInner(outer, Index, src);
- assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
- }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
-{
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_LinearTraversal_CompleteUnrolling
-{
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- dst.copyCoeff(Index, src);
- assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
-{
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_innervec_CompleteUnrolling
-{
- enum {
- outer = Index / Derived1::InnerSizeAtCompileTime,
- inner = Index % Derived1::InnerSizeAtCompileTime,
- JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
- };
-
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
- assign_innervec_CompleteUnrolling<Derived1, Derived2,
- Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
-{
- static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_innervec_InnerUnrolling
-{
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
- {
- dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
- assign_innervec_InnerUnrolling<Derived1, Derived2,
- Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
- }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
-{
- static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
-};
-
-/***************************************************************************
-* Part 3 : implementation of all cases
-***************************************************************************/
-
-template<typename Derived1, typename Derived2,
- int Traversal = assign_traits<Derived1, Derived2>::Traversal,
- int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
- int Version = Specialized>
-struct assign_impl;
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename Derived1, typename Derived2, int Unrolling, int Version>
-struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
-{
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &, const Derived2 &) { }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- const Index innerSize = dst.innerSize();
- const Index outerSize = dst.outerSize();
- for(Index outer = 0; outer < outerSize; ++outer)
- for(Index inner = 0; inner < innerSize; ++inner)
- dst.copyCoeffByOuterInner(outer, inner, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
-{
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
- ::run(dst, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- const Index outerSize = dst.outerSize();
- for(Index outer = 0; outer < outerSize; ++outer)
- assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
- ::run(dst, src, outer);
- }
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- const Index size = dst.size();
- for(Index i = 0; i < size; ++i)
- dst.copyCoeff(i, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
-{
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
- ::run(dst, src);
- }
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- const Index innerSize = dst.innerSize();
- const Index outerSize = dst.outerSize();
- const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
- for(Index outer = 0; outer < outerSize; ++outer)
- for(Index inner = 0; inner < innerSize; inner+=packetSize)
- dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
-{
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
- ::run(dst, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- const Index outerSize = dst.outerSize();
- for(Index outer = 0; outer < outerSize; ++outer)
- assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
- ::run(dst, src, outer);
- }
-};
-
-/***************************
-*** Linear vectorization ***
-***************************/
-
-template <bool IsAligned = false>
-struct unaligned_assign_impl
-{
- template <typename Derived, typename OtherDerived>
- static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
-};
-
-template <>
-struct unaligned_assign_impl<false>
-{
- // MSVC must not inline this functions. If it does, it fails to optimize the
- // packet access path.
-#ifdef _MSC_VER
- template <typename Derived, typename OtherDerived>
- static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
-#else
- template <typename Derived, typename OtherDerived>
- static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
-#endif
- {
- for (typename Derived::Index index = start; index < end; ++index)
- dst.copyCoeff(index, src);
- }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- const Index size = dst.size();
- typedef packet_traits<typename Derived1::Scalar> PacketTraits;
- enum {
- packetSize = PacketTraits::size,
- dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
- srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
- };
- const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
- : internal::first_aligned(&dst.coeffRef(0), size);
- const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
-
- unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
-
- for(Index index = alignedStart; index < alignedEnd; index += packetSize)
- {
- dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
- }
-
- unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
- }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
- {
- enum { size = Derived1::SizeAtCompileTime,
- packetSize = packet_traits<typename Derived1::Scalar>::size,
- alignedSize = (size/packetSize)*packetSize };
-
- assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
- assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
- }
-};
-
-/**************************
-*** Slice vectorization ***
-***************************/
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
-{
- typedef typename Derived1::Index Index;
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- typedef packet_traits<typename Derived1::Scalar> PacketTraits;
- enum {
- packetSize = PacketTraits::size,
- alignable = PacketTraits::AlignedOnScalar,
- dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
- srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
- };
- const Index packetAlignedMask = packetSize - 1;
- const Index innerSize = dst.innerSize();
- const Index outerSize = dst.outerSize();
- const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
- Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
- : internal::first_aligned(&dst.coeffRef(0,0), innerSize);
-
- for(Index outer = 0; outer < outerSize; ++outer)
- {
- const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
- // do the non-vectorizable part of the assignment
- for(Index inner = 0; inner<alignedStart ; ++inner)
- dst.copyCoeffByOuterInner(outer, inner, src);
-
- // do the vectorizable part of the assignment
- for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
- dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
-
- // do the non-vectorizable part of the assignment
- for(Index inner = alignedEnd; inner<innerSize ; ++inner)
- dst.copyCoeffByOuterInner(outer, inner, src);
-
- alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
- }
- }
-};
-
-} // end namespace internal
-
-/***************************************************************************
-* Part 4 : implementation of DenseBase methods
-***************************************************************************/
-
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
@@ -506,91 +27,35 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-#ifdef EIGEN_TEST_EVALUATORS
-
-#ifdef EIGEN_DEBUG_ASSIGN
- internal::copy_using_evaluator_traits<Derived, OtherDerived>::debug();
-#endif
- eigen_assert(rows() == other.rows() && cols() == other.cols());
- internal::call_dense_assignment_loop(derived(),other.derived());
-
-#else // EIGEN_TEST_EVALUATORS
-
-#ifdef EIGEN_DEBUG_ASSIGN
- internal::assign_traits<Derived, OtherDerived>::debug();
-#endif
eigen_assert(rows() == other.rows() && cols() == other.cols());
- internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
- : int(InvalidTraversal)>::run(derived(),other.derived());
+ internal::call_assignment_no_alias(derived(),other.derived());
-#endif // EIGEN_TEST_EVALUATORS
-
-#ifndef EIGEN_NO_DEBUG
- checkTransposeAliasing(other.derived());
-#endif
return derived();
}
-namespace internal {
-
-template<typename Derived, typename OtherDerived,
- bool EvalBeforeAssigning = (int(internal::traits<OtherDerived>::Flags) & EvalBeforeAssigningBit) != 0,
- bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1)
- | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
- // revert to || as soon as not needed anymore.
- (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
- && int(Derived::SizeAtCompileTime) != 1>
-struct assign_selector;
-
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,false,false> {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
- template<typename ActualDerived, typename ActualOtherDerived>
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
-};
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,true,false> {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
-};
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,false,true> {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
- template<typename ActualDerived, typename ActualOtherDerived>
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
-};
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,true,true> {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
-};
-
-} // end namespace internal
-
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{
- return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+ internal::call_assignment(derived(), other.derived());
+ return derived();
}
template<typename Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
{
- return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+ internal::call_assignment(derived(), other.derived());
+ return derived();
}
template<typename Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
{
- return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+ internal::call_assignment(derived(), other.derived());
+ return derived();
}
template<typename Derived>
@@ -598,7 +63,8 @@ template <typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{
- return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+ internal::call_assignment(derived(), other.derived());
+ return derived();
}
template<typename Derived>
@@ -606,7 +72,8 @@ template <typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
{
- return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+ internal::call_assignment(derived(), other.derived());
+ return derived();
}
template<typename Derived>
@@ -614,7 +81,8 @@ template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
{
- return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+ other.derived().evalTo(derived());
+ return derived();
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5451a138f..4db10e697 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -24,37 +24,46 @@ namespace internal {
// copy_using_evaluator_traits is based on assign_traits
-template <typename Derived, typename OtherDerived>
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
struct copy_using_evaluator_traits
{
+ typedef typename DstEvaluator::XprType Dst;
+
+ enum {
+ DstFlags = DstEvaluator::Flags,
+ SrcFlags = SrcEvaluator::Flags
+ };
+
public:
enum {
- DstIsAligned = Derived::Flags & AlignedBit,
- DstHasDirectAccess = Derived::Flags & DirectAccessBit,
- SrcIsAligned = OtherDerived::Flags & AlignedBit,
- JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
- SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
+ DstIsAligned = DstFlags & AlignedBit,
+ DstHasDirectAccess = DstFlags & DirectAccessBit,
+ SrcIsAligned = SrcFlags & AlignedBit,
+ JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
};
private:
enum {
- InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
- : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
- : int(Derived::RowsAtCompileTime),
- InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
- : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
- : int(Derived::MaxRowsAtCompileTime),
- MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
- PacketSize = packet_traits<typename Derived::Scalar>::size
+ InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
+ : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
+ : int(Dst::RowsAtCompileTime),
+ InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
+ : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
+ : int(Dst::MaxRowsAtCompileTime),
+ MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
+ PacketSize = packet_traits<typename Dst::Scalar>::size
};
enum {
- StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
+ DstIsRowMajor = DstFlags&RowMajorBit,
+ SrcIsRowMajor = SrcFlags&RowMajorBit,
+ StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
MightVectorize = StorageOrdersAgree
- && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
+ && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
+ && (functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
- MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+ MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
@@ -68,8 +77,7 @@ private:
public:
enum {
- Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal)
- : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
+ Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
: int(MayLinearize) ? int(LinearTraversal)
@@ -82,12 +90,12 @@ public:
private:
enum {
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
- MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
- && int(OtherDerived::CoeffReadCost) != Dynamic
- && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
+ MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
+ && int(SrcEvaluator::CoeffReadCost) != Dynamic
+ && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize) != Dynamic
- && int(OtherDerived::CoeffReadCost) != Dynamic
- && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
+ && int(SrcEvaluator::CoeffReadCost) != Dynamic
+ && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
};
public:
@@ -110,6 +118,12 @@ public:
#ifdef EIGEN_DEBUG_ASSIGN
static void debug()
{
+ std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
+ std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
+ std::cerr.setf(std::ios::hex, std::ios::basefield);
+ EIGEN_DEBUG_VAR(DstFlags)
+ EIGEN_DEBUG_VAR(SrcFlags)
+ std::cerr.unsetf(std::ios::hex);
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
EIGEN_DEBUG_VAR(JointAlignment)
@@ -127,6 +141,7 @@ public:
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
EIGEN_DEBUG_VAR(Unrolling)
+ std::cerr << std::endl;
}
#endif
};
@@ -142,6 +157,7 @@ public:
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{
+ // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
@@ -150,7 +166,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
inner = Index % DstXprType::InnerSizeAtCompileTime
};
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
kernel.assignCoeffByOuterInner(outer, inner);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
@@ -160,13 +176,13 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
template<typename Kernel, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(Kernel&) { }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
{
- static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer)
{
kernel.assignCoeffByOuterInner(outer, Index);
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index+1, Stop>::run(kernel, outer);
@@ -176,7 +192,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
template<typename Kernel, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(Kernel&, int) { }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index) { }
};
/***********************
@@ -186,7 +202,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
{
- static EIGEN_STRONG_INLINE void run(Kernel& kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
{
kernel.assignCoeff(Index);
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
@@ -196,7 +212,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
template<typename Kernel, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(Kernel&) { }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
/**************************
@@ -206,16 +222,17 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling
{
+ // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
-
+
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
JointAlignment = Kernel::AssignmentTraits::JointAlignment
};
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
@@ -226,17 +243,16 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
template<typename Kernel, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(Kernel&) { }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
- static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer)
{
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index);
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+ enum { NextIndex = Index + packet_traits<typename Kernel::Scalar>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
}
};
@@ -244,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling
template<typename Kernel, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
{
- static EIGEN_STRONG_INLINE void run(Kernel &, int) { }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, typename Kernel::Index) { }
};
/***************************************************************************
@@ -265,7 +281,7 @@ struct dense_assignment_loop;
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
{
- static void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
@@ -280,7 +296,7 @@ struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
{
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
@@ -291,7 +307,7 @@ template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
{
typedef typename Kernel::Index Index;
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
@@ -314,7 +330,7 @@ struct unaligned_dense_assignment_loop
{
// if IsAligned = true, then do nothing
template <typename Kernel>
- static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {}
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {}
};
template <>
@@ -323,14 +339,14 @@ struct unaligned_dense_assignment_loop<false>
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
// FIXME check which version exhibits this issue
-#ifdef _MSC_VER
+#if EIGEN_COMP_MSVC
template <typename Kernel>
static EIGEN_DONT_INLINE void run(Kernel &kernel,
typename Kernel::Index start,
typename Kernel::Index end)
#else
template <typename Kernel>
- static EIGEN_STRONG_INLINE void run(Kernel &kernel,
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
typename Kernel::Index start,
typename Kernel::Index end)
#endif
@@ -343,7 +359,7 @@ struct unaligned_dense_assignment_loop<false>
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
{
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
@@ -371,7 +387,7 @@ template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Kernel::Index Index;
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
@@ -391,7 +407,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{
- static inline void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
@@ -407,7 +423,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
{
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
@@ -418,7 +434,7 @@ template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
{
typedef typename Kernel::Index Index;
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
const Index outerSize = kernel.outerSize();
@@ -434,7 +450,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
{
- static inline void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
const Index size = kernel.size();
@@ -446,7 +462,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
{
- static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
@@ -460,7 +476,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
template<typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
{
- static inline void run(Kernel &kernel)
+ EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
{
typedef typename Kernel::Index Index;
typedef packet_traits<typename Kernel::Scalar> PacketTraits;
@@ -496,25 +512,8 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
}
};
-/****************************
-*** All-at-once traversal ***
-****************************/
-
-// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael)
-// Indeed, what to do with the kernel's functor??
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
-{
- static inline void run(Kernel & kernel)
- {
- // Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
- // TODO: Do not pass the xpr object to evalTo() (Jitse)
- kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression());
- }
-};
-
/***************************************************************************
-* Part 4 : Generic Assignment routine
+* Part 4 : Generic dense assignment kernel
***************************************************************************/
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
@@ -523,7 +522,7 @@ struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
// One can customize the assignment using this generic dense_assignment_kernel with different
// functors, or by completely overloading it, by-passing a functor.
-template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
class generic_dense_assignment_kernel
{
protected:
@@ -535,35 +534,44 @@ public:
typedef SrcEvaluatorTypeT SrcEvaluatorType;
typedef typename DstEvaluatorType::Scalar Scalar;
typedef typename DstEvaluatorType::Index Index;
- typedef copy_using_evaluator_traits<DstXprType, SrcXprType> AssignmentTraits;
+ typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
- generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+ EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
- {}
+ {
+ #ifdef EIGEN_DEBUG_ASSIGN
+ AssignmentTraits::debug();
+ #endif
+ }
- Index size() const { return m_dstExpr.size(); }
- Index innerSize() const { return m_dstExpr.innerSize(); }
- Index outerSize() const { return m_dstExpr.outerSize(); }
- Index outerStride() const { return m_dstExpr.outerStride(); }
+ EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
+ EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
+ EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
+ EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
+ EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
// TODO get rid of this one:
- DstXprType& dstExpression() const { return m_dstExpr; }
+ EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; }
- DstEvaluatorType& dstEvaluator() { return m_dst; }
- const SrcEvaluatorType& srcEvaluator() const { return m_src; }
+ EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
+ EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
- void assignCoeff(Index row, Index col)
+ /// Assign src(row,col) to dst(row,col) through the assignment functor.
+ EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
{
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
}
- void assignCoeff(Index index)
+ /// \sa assignCoeff(Index,Index)
+ EIGEN_DEVICE_FUNC void assignCoeff(Index index)
{
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
}
- void assignCoeffByOuterInner(Index outer, Index inner)
+ /// \sa assignCoeff(Index,Index)
+ EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
@@ -572,40 +580,40 @@ public:
template<int StoreMode, int LoadMode>
- void assignPacket(Index row, Index col)
+ EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
}
template<int StoreMode, int LoadMode>
- void assignPacket(Index index)
+ EIGEN_DEVICE_FUNC void assignPacket(Index index)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
}
template<int StoreMode, int LoadMode>
- void assignPacketByOuterInner(Index outer, Index inner)
+ EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode>(row, col);
}
- static Index rowIndexByOuterInner(Index outer, Index inner)
+ EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
: int(Traits::ColsAtCompileTime) == 1 ? inner
- : int(Traits::Flags)&RowMajorBit ? outer
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
: inner;
}
- static Index colIndexByOuterInner(Index outer, Index inner)
+ EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
: int(Traits::RowsAtCompileTime) == 1 ? inner
- : int(Traits::Flags)&RowMajorBit ? inner
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
: outer;
}
@@ -617,13 +625,13 @@ protected:
DstXprType& m_dstExpr;
};
+/***************************************************************************
+* Part 5 : Entry point for dense rectangular assignment
+***************************************************************************/
+
template<typename DstXprType, typename SrcXprType, typename Functor>
-void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
{
-#ifdef EIGEN_DEBUG_ASSIGN
- // TODO these traits should be computed from information provided by the evaluators
- internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
-#endif
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
@@ -639,201 +647,147 @@ void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, co
}
template<typename DstXprType, typename SrcXprType>
-void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
}
/***************************************************************************
-* Part 5 : Entry points
+* Part 6 : Generic assignment
***************************************************************************/
-// Based on DenseBase::LazyAssign()
-// The following functions are just for testing and they are meant to be moved to operator= and the likes.
-
-template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst,
- const EigenBase<SrcXprType>& src)
-{
- return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
-}
+// Based on the respective shapes of the destination and source,
+// the class AssignmentKind determine the kind of assignment mechanism.
+// AssignmentKind must define a Kind typedef.
+template<typename DstShape, typename SrcShape> struct AssignmentKind;
-template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
-struct AddEvalIfAssumingAliasing;
+// Assignement kind defined in this file:
+struct Dense2Dense {};
+struct EigenBase2EigenBase {};
-template<typename XprType>
-struct AddEvalIfAssumingAliasing<XprType, 0>
-{
- static const XprType& run(const XprType& xpr)
- {
- return xpr;
- }
-};
+template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
+template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
+
+// This is the main assignment class
+template< typename DstXprType, typename SrcXprType, typename Functor,
+ typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
+ typename Scalar = typename DstXprType::Scalar>
+struct Assignment;
-template<typename XprType>
-struct AddEvalIfAssumingAliasing<XprType, 1>
-{
- static const EvalToTemp<XprType> run(const XprType& xpr)
- {
- return EvalToTemp<XprType>(xpr);
- }
-};
-template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
-{
- return noalias_copy_using_evaluator(dst.const_cast_derived(),
- AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()),
- func
- );
-}
+// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
+// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
+// So this intermediate function removes everything related to AssumeAliasing such that Assignment
+// does not has to bother about these annoying details.
-// this mimics operator=
-template<typename DstXprType, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src)
{
- return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
}
-
-template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
{
-#ifdef EIGEN_DEBUG_ASSIGN
- internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
-#endif
-#ifdef EIGEN_NO_AUTOMATIC_RESIZING
- eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
- : (dst.rows() == src.rows() && dst.cols() == src.cols())))
- && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
-#else
- dst.const_cast_derived().resizeLike(src.derived());
-#endif
- call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
- return dst.derived();
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
}
-
-template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+
+// Deal with AssumeAliasing
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
{
- call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
- return dst.derived();
+ typename plain_matrix_type<Src>::type tmp(src);
+ call_assignment_no_alias(dst, tmp, func);
}
-// Based on DenseBase::swap()
-// TODO: Check whether we need to do something special for swapping two
-// Arrays or Matrices. (Jitse)
-
-// Overload default assignPacket behavior for swapping them
-template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
-class swap_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> >
-{
- typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > Base;
- typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
- using Base::m_dst;
- using Base::m_src;
- using Base::m_functor;
-
-public:
- typedef typename Base::Scalar Scalar;
- typedef typename Base::Index Index;
- typedef typename Base::DstXprType DstXprType;
-
- swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr)
- : Base(dst, src, swap_assign_op<Scalar>(), dstExpr)
- {}
-
- template<int StoreMode, int LoadMode>
- void assignPacket(Index row, Index col)
- {
- m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
- }
-
- template<int StoreMode, int LoadMode>
- void assignPacket(Index index)
- {
- m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
- }
-
- // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
- template<int StoreMode, int LoadMode>
- void assignPacketByOuterInner(Index outer, Index inner)
- {
- Index row = Base::rowIndexByOuterInner(outer, inner);
- Index col = Base::colIndexByOuterInner(outer, inner);
- assignPacket<StoreMode,LoadMode>(row, col);
- }
-};
-
-template<typename DstXprType, typename SrcXprType>
-void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
{
- // TODO there is too much redundancy with call_dense_assignment_loop
-
- eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-
- typedef typename evaluator<DstXprType>::type DstEvaluatorType;
- typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
- DstEvaluatorType dstEvaluator(dst);
- SrcEvaluatorType srcEvaluator(src);
-
- typedef swap_kernel<DstEvaluatorType,SrcEvaluatorType> Kernel;
- Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived());
-
- dense_assignment_loop<Kernel>::run(kernel);
+ call_assignment_no_alias(dst, src, func);
}
-// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
-template<typename DstXprType, typename SrcXprType>
-void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+// by-pass AssumeAliasing
+// FIXME the const version should probably not be needed
+// When there is no aliasing, we require that 'dst' has been properly resized
+template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+EIGEN_DEVICE_FUNC void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
{
- typedef typename DstXprType::Scalar Scalar;
- copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
+ call_assignment_no_alias(dst.expression(), src, func);
}
-
-// Based on ArrayBase::operator+=
-template<typename DstXprType, typename SrcXprType>
-void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
{
- typedef typename DstXprType::Scalar Scalar;
- copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
+ call_assignment_no_alias(dst.expression(), src, func);
}
-// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse)
-template<typename DstXprType, typename SrcXprType>
-void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
{
- typedef typename DstXprType::Scalar Scalar;
- copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
-}
+ enum {
+ NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
+ | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+ // revert to || as soon as not needed anymore.
+ (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1))
+ && int(Dst::SizeAtCompileTime) != 1
+ };
-template<typename DstXprType, typename SrcXprType>
-void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
-{
- typedef typename DstXprType::Scalar Scalar;
- copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
+ typename Dst::Index dstRows = NeedToTranspose ? src.cols() : src.rows();
+ typename Dst::Index dstCols = NeedToTranspose ? src.rows() : src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+
+ typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
+ typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
+ ActualDstType actualDst(dst);
+
+ // TODO check whether this is the right place to perform these checks:
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
+
+ // TODO this line is commented to allow matrix = permutation
+ // Actually, the "Scalar" type for a permutation matrix does not really make sense,
+ // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...?
+// EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
+
+ Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
-
-template<typename DstXprType, typename SrcXprType>
-void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
{
- typedef typename DstXprType::Scalar Scalar;
- copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>());
+ call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
}
-template<typename DstXprType, typename SrcXprType>
-void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+// forward declaration
+template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
+
+// Generic Dense to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
{
- typedef typename DstXprType::Scalar Scalar;
- copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>());
-}
+ EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+#ifndef EIGEN_NO_DEBUG
+ internal::check_for_aliasing(dst, src);
+#endif
+
+ call_dense_assignment_loop(dst, src, func);
+ }
+};
+// Generic assignment through evalTo.
+// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
+{
+ EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+ src.evalTo(dst);
+ }
+};
} // namespace internal
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index ffd7fe8b3..e59ee3da9 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -204,7 +204,7 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
typedef typename internal::traits<BandMatrix>::Index Index;
typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
- inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
+ explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
: m_coeffs(1+supers+subs,cols),
m_rows(rows), m_supers(supers), m_subs(subs)
{
@@ -266,7 +266,7 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
typedef typename internal::traits<BandMatrixWrapper>::Index Index;
- inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
+ explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
: m_coeffs(coeffs),
m_rows(rows), m_supers(supers), m_subs(subs)
{
@@ -314,7 +314,7 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
typedef typename Base::Index Index;
public:
- TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
+ explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
inline typename Base::template DiagonalIntReturnType<1>::Type super()
{ return Base::template diagonal<1>(); }
@@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
protected:
};
+
+struct BandShape {};
+
+template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
+struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+ : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+ typedef BandShape Shape;
+};
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+ : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+ typedef BandShape Shape;
+};
+
+template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index da193d1a2..9cf9d5432 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -68,6 +68,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaxColsAtCompileTime = BlockCols==0 ? 0
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
: int(traits<XprType>::MaxColsAtCompileTime),
+
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -80,18 +81,14 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
- MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
- && (InnerStrideAtCompileTime == 1)
- ? PacketAccessBit : 0,
- MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
- FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
+ // IsAligned is needed by MapBase's assertions
+ // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
+ IsAligned = 0,
+ // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
- Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
- DirectAccessBit |
- MaskPacketAccessBit |
- MaskAlignedBit),
- Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
+ Flags = (traits<XprType>::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit
+ // FIXME DirectAccessBit should not be handled by expressions
};
};
@@ -111,6 +108,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
typedef Impl Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
+
+ typedef typename internal::remove_all<XprType>::type NestedExpression;
/** Column or Row constructor
*/
@@ -179,7 +178,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
- class InnerIterator;
+ // class InnerIterator; // FIXME apparently never used
/** Column or Row constructor
*/
@@ -333,6 +332,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
{
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ enum {
+ XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
+ };
public:
typedef MapBase<BlockType> Base;
@@ -343,9 +345,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index i)
- : Base(internal::const_cast_ptr(&xpr.coeffRef(
- (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
- (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
+ : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
+ || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
BlockRows==1 ? 1 : xpr.rows(),
BlockCols==1 ? 1 : xpr.cols()),
m_xpr(xpr)
@@ -357,7 +358,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
*/
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
- : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
+ : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
+ m_xpr(xpr)
{
init();
}
@@ -368,7 +370,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
inline BlockImpl_dense(XprType& xpr,
Index startRow, Index startCol,
Index blockRows, Index blockCols)
- : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols),
+ : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
m_xpr(xpr)
{
init();
diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h
index be9f48a8c..dac1887e0 100644
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -17,9 +17,10 @@ namespace internal {
template<typename Derived, int UnrollCount>
struct all_unroller
{
+ typedef typename Derived::ExpressionTraits Traits;
enum {
- col = (UnrollCount-1) / Derived::RowsAtCompileTime,
- row = (UnrollCount-1) % Derived::RowsAtCompileTime
+ col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+ row = (UnrollCount-1) % Traits::RowsAtCompileTime
};
static inline bool run(const Derived &mat)
@@ -43,11 +44,12 @@ struct all_unroller<Derived, Dynamic>
template<typename Derived, int UnrollCount>
struct any_unroller
{
+ typedef typename Derived::ExpressionTraits Traits;
enum {
- col = (UnrollCount-1) / Derived::RowsAtCompileTime,
- row = (UnrollCount-1) % Derived::RowsAtCompileTime
+ col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+ row = (UnrollCount-1) % Traits::RowsAtCompileTime
};
-
+
static inline bool run(const Derived &mat)
{
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
@@ -78,19 +80,21 @@ struct any_unroller<Derived, Dynamic>
template<typename Derived>
inline bool DenseBase<Derived>::all() const
{
+ typedef typename internal::evaluator<Derived>::type Evaluator;
enum {
unroll = SizeAtCompileTime != Dynamic
- && CoeffReadCost != Dynamic
+ && Evaluator::CoeffReadCost != Dynamic
&& NumTraits<Scalar>::AddCost != Dynamic
- && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+ && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
};
+ Evaluator evaluator(derived());
if(unroll)
- return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+ return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
else
{
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
- if (!coeff(i, j)) return false;
+ if (!evaluator.coeff(i, j)) return false;
return true;
}
}
@@ -102,19 +106,21 @@ inline bool DenseBase<Derived>::all() const
template<typename Derived>
inline bool DenseBase<Derived>::any() const
{
+ typedef typename internal::evaluator<Derived>::type Evaluator;
enum {
unroll = SizeAtCompileTime != Dynamic
- && CoeffReadCost != Dynamic
+ && Evaluator::CoeffReadCost != Dynamic
&& NumTraits<Scalar>::AddCost != Dynamic
- && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+ && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
};
+ Evaluator evaluator(derived());
if(unroll)
- return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+ return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
else
{
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
- if (coeff(i, j)) return true;
+ if (evaluator.coeff(i, j)) return true;
return false;
}
}
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 3568cb85f..1c7123b85 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -14,57 +14,85 @@
#define EIGEN_COREEVALUATORS_H
namespace Eigen {
-
+
namespace internal {
-// evaluator_traits<T> contains traits for evaluator_impl<T>
+// This class returns the evaluator kind from the expression storage kind.
+// Default assumes index based accessors
+template<typename StorageKind>
+struct storage_kind_to_evaluator_kind {
+ typedef IndexBased Kind;
+};
-template<typename T>
-struct evaluator_traits
-{
- // 1 if evaluator_impl<T>::evalTo() exists
- // 0 if evaluator_impl<T> allows coefficient-based access
- static const int HasEvalTo = 0;
+// This class returns the evaluator shape from the expression storage kind.
+// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc.
+template<typename StorageKind> struct storage_kind_to_shape;
- // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
- // temporary; 0 if not.
- static const int AssumeAliasing = 0;
-};
-// expression class for evaluating nested expression to a temporary
-
-template<typename ArgType>
-class EvalToTemp;
+template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
-// evaluator<T>::type is type of evaluator for T
-// evaluator<T>::nestedType is type of evaluator if T is nested inside another evaluator
-
-template<typename T>
-struct evaluator_impl
-{ };
-
-template<typename T, int Nested = evaluator_traits<T>::HasEvalTo>
-struct evaluator_nested_type;
+
+// FIXME Is this necessary? And why was it not before refactoring???
+template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
+
+
+// Evaluators have to be specialized with respect to various criteria such as:
+// - storage/structure/shape
+// - scalar type
+// - etc.
+// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators.
+// We currently distinguish the following kind of evaluators:
+// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate)
+// - binary_evaluator for expression taking two arguments (CwiseBinaryOp)
+// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching.
+// - mapbase_evaluator for Map, Block, Ref
+// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator)
+
+template< typename T,
+ typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
+ typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
+ typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+ typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator;
+
+template< typename T,
+ typename Kind = typename evaluator_traits<typename T::NestedExpression>::Kind,
+ typename Scalar = typename T::Scalar> struct unary_evaluator;
+
+// evaluator_traits<T> contains traits for evaluator<T>
template<typename T>
-struct evaluator_nested_type<T, 0>
+struct evaluator_traits_base
{
- typedef evaluator_impl<T> type;
+ // TODO check whether these two indirections are really needed.
+ // Basically, if nobody overwrite type and nestedType, then, they can be dropped
+// typedef evaluator<T> type;
+// typedef evaluator<T> nestedType;
+
+ // by default, get evaluator kind and shape from storage
+ typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
+ typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
+
+ // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
+ // temporary; 0 if not.
+ static const int AssumeAliasing = 0;
};
+// Default evaluator traits
template<typename T>
-struct evaluator_nested_type<T, 1>
+struct evaluator_traits : public evaluator_traits_base<T>
{
- typedef evaluator_impl<EvalToTemp<T> > type;
};
+
+// By default, we assume a unary expression:
template<typename T>
-struct evaluator
+struct evaluator : public unary_evaluator<T>
{
- typedef evaluator_impl<T> type;
- typedef typename evaluator_nested_type<T>::type nestedType;
+ typedef unary_evaluator<T> Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
};
+
// TODO: Think about const-correctness
template<typename T>
@@ -76,47 +104,59 @@ struct evaluator<const T>
// TODO this class does not seem to be necessary anymore
template<typename ExpressionType>
-struct evaluator_impl_base
+struct evaluator_base
{
- typedef typename ExpressionType::Index Index;
+// typedef typename evaluator_traits<ExpressionType>::type type;
+// typedef typename evaluator_traits<ExpressionType>::nestedType nestedType;
+ typedef evaluator<ExpressionType> type;
+ typedef evaluator<ExpressionType> nestedType;
+
+ typedef typename traits<ExpressionType>::Index Index;
// TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
typedef traits<ExpressionType> ExpressionTraits;
-
- evaluator_impl<ExpressionType>& derived()
- {
- return *static_cast<evaluator_impl<ExpressionType>*>(this);
- }
};
// -------------------- Matrix and Array --------------------
//
-// evaluator_impl<PlainObjectBase> is a common base class for the
+// evaluator<PlainObjectBase> is a common base class for the
// Matrix and Array evaluators.
+// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense,
+// so no need for more sophisticated dispatching.
template<typename Derived>
-struct evaluator_impl<PlainObjectBase<Derived> >
- : evaluator_impl_base<Derived>
+struct evaluator<PlainObjectBase<Derived> >
+ : evaluator_base<Derived>
{
typedef PlainObjectBase<Derived> PlainObjectType;
+ typedef typename PlainObjectType::Index Index;
+ typedef typename PlainObjectType::Scalar Scalar;
+ typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
+ typedef typename PlainObjectType::PacketScalar PacketScalar;
+ typedef typename PlainObjectType::PacketReturnType PacketReturnType;
enum {
IsRowMajor = PlainObjectType::IsRowMajor,
IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
- ColsAtCompileTime = PlainObjectType::ColsAtCompileTime
+ ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
+
+ CoeffReadCost = NumTraits<Scalar>::ReadCost,
+ Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
+ Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
};
-
- evaluator_impl(const PlainObjectType& m)
+
+ EIGEN_DEVICE_FUNC evaluator()
+ : m_data(0),
+ m_outerStride(IsVectorAtCompileTime ? 0
+ : int(IsRowMajor) ? ColsAtCompileTime
+ : RowsAtCompileTime)
+ {}
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
: m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride())
{ }
- typedef typename PlainObjectType::Index Index;
- typedef typename PlainObjectType::Scalar Scalar;
- typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
- typedef typename PlainObjectType::PacketScalar PacketScalar;
- typedef typename PlainObjectType::PacketReturnType PacketReturnType;
-
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
if (IsRowMajor)
return m_data[row * m_outerStride.value() + col];
@@ -124,12 +164,12 @@ struct evaluator_impl<PlainObjectBase<Derived> >
return m_data[row + col * m_outerStride.value()];
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_data[index];
}
- Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
{
if (IsRowMajor)
return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
@@ -137,7 +177,7 @@ struct evaluator_impl<PlainObjectBase<Derived> >
return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
}
- Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
{
return const_cast<Scalar*>(m_data)[index];
}
@@ -184,153 +224,45 @@ protected:
};
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
- : evaluator_impl<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
{
typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+
+ evaluator() {}
- evaluator_impl(const XprType& m)
- : evaluator_impl<PlainObjectBase<XprType> >(m)
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+ : evaluator<PlainObjectBase<XprType> >(m)
{ }
};
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
- : evaluator_impl<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
{
typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
- evaluator_impl(const XprType& m)
- : evaluator_impl<PlainObjectBase<XprType> >(m)
- { }
-};
-
-// -------------------- EvalToTemp --------------------
-
-template<typename ArgType>
-struct traits<EvalToTemp<ArgType> >
- : public traits<ArgType>
-{ };
-
-template<typename ArgType>
-class EvalToTemp
- : public dense_xpr_base<EvalToTemp<ArgType> >::type
-{
- public:
-
- typedef typename dense_xpr_base<EvalToTemp>::type Base;
- EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
-
- EvalToTemp(const ArgType& arg)
- : m_arg(arg)
- { }
-
- const ArgType& arg() const
- {
- return m_arg;
- }
-
- Index rows() const
- {
- return m_arg.rows();
- }
-
- Index cols() const
- {
- return m_arg.cols();
- }
-
- private:
- const ArgType& m_arg;
-};
-
-template<typename ArgType>
-struct evaluator_impl<EvalToTemp<ArgType> >
-{
- typedef EvalToTemp<ArgType> XprType;
- typedef typename ArgType::PlainObject PlainObject;
-
- evaluator_impl(const XprType& xpr)
- : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result)
- {
- // TODO we should simply do m_result(xpr.arg());
- call_dense_assignment_loop(m_result, xpr.arg());
- }
-
- // This constructor is used when nesting an EvalTo evaluator in another evaluator
- evaluator_impl(const ArgType& arg)
- : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result)
- {
- // TODO we should simply do m_result(xpr.arg());
- call_dense_assignment_loop(m_result, arg);
- }
-
- typedef typename PlainObject::Index Index;
- typedef typename PlainObject::Scalar Scalar;
- typedef typename PlainObject::CoeffReturnType CoeffReturnType;
- typedef typename PlainObject::PacketScalar PacketScalar;
- typedef typename PlainObject::PacketReturnType PacketReturnType;
-
- // All other functions are forwarded to m_resultImpl
-
- CoeffReturnType coeff(Index row, Index col) const
- {
- return m_resultImpl.coeff(row, col);
- }
-
- CoeffReturnType coeff(Index index) const
- {
- return m_resultImpl.coeff(index);
- }
+ evaluator() {}
- Scalar& coeffRef(Index row, Index col)
- {
- return m_resultImpl.coeffRef(row, col);
- }
-
- Scalar& coeffRef(Index index)
- {
- return m_resultImpl.coeffRef(index);
- }
-
- template<int LoadMode>
- PacketReturnType packet(Index row, Index col) const
- {
- return m_resultImpl.template packet<LoadMode>(row, col);
- }
-
- template<int LoadMode>
- PacketReturnType packet(Index index) const
- {
- return m_resultImpl.packet<LoadMode>(index);
- }
-
- template<int StoreMode>
- void writePacket(Index row, Index col, const PacketScalar& x)
- {
- m_resultImpl.template writePacket<StoreMode>(row, col, x);
- }
-
- template<int StoreMode>
- void writePacket(Index index, const PacketScalar& x)
- {
- m_resultImpl.template writePacket<StoreMode>(index, x);
- }
-
-protected:
- PlainObject m_result;
- typename evaluator<PlainObject>::nestedType m_resultImpl;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+ : evaluator<PlainObjectBase<XprType> >(m)
+ { }
};
// -------------------- Transpose --------------------
template<typename ArgType>
-struct evaluator_impl<Transpose<ArgType> >
- : evaluator_impl_base<Transpose<ArgType> >
+struct unary_evaluator<Transpose<ArgType>, IndexBased>
+ : evaluator_base<Transpose<ArgType> >
{
typedef Transpose<ArgType> XprType;
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags ^ RowMajorBit
+ };
- evaluator_impl(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
@@ -338,22 +270,22 @@ struct evaluator_impl<Transpose<ArgType> >
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(col, row);
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(index);
}
- Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(col, row);
}
- typename XprType::Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(index);
}
@@ -387,13 +319,27 @@ protected:
};
// -------------------- CwiseNullaryOp --------------------
+// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator.
+// Likewise, there is not need to more sophisticated dispatching here.
template<typename NullaryOp, typename PlainObjectType>
-struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+ : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
{
typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+ typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
+
+ enum {
+ CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
+
+ Flags = (evaluator<PlainObjectTypeCleaned>::Flags
+ & ( HereditaryBits
+ | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
+ | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
+ | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
+ };
- evaluator_impl(const XprType& n)
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
: m_functor(n.functor())
{ }
@@ -401,12 +347,12 @@ struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> >
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_functor(row, col);
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_functor(index);
}
@@ -430,11 +376,20 @@ protected:
// -------------------- CwiseUnaryOp --------------------
template<typename UnaryOp, typename ArgType>
-struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> >
+struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
+ : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >
{
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+
+ Flags = evaluator<ArgType>::Flags & (
+ HereditaryBits | LinearAccessBit | AlignedBit
+ | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
+ };
- evaluator_impl(const XprType& op)
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
: m_functor(op.functor()),
m_argImpl(op.nestedExpression())
{ }
@@ -443,12 +398,12 @@ struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> >
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_functor(m_argImpl.coeff(row, col));
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_functor(m_argImpl.coeff(index));
}
@@ -472,12 +427,43 @@ protected:
// -------------------- CwiseBinaryOp --------------------
+// this is a binary expression
template<typename BinaryOp, typename Lhs, typename Rhs>
-struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+ : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+ typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
- evaluator_impl(const XprType& xpr)
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
+ : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+
+ LhsFlags = evaluator<Lhs>::Flags,
+ RhsFlags = evaluator<Rhs>::Flags,
+ SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
+ StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
+ Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+ HereditaryBits
+ | (int(LhsFlags) & int(RhsFlags) &
+ ( AlignedBit
+ | (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+ )
+ )
+ ),
+ Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
+ };
+
+ EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
: m_functor(xpr.functor()),
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
@@ -487,12 +473,12 @@ struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
}
@@ -501,14 +487,14 @@ struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
PacketScalar packet(Index row, Index col) const
{
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col),
- m_rhsImpl.template packet<LoadMode>(row, col));
+ m_rhsImpl.template packet<LoadMode>(row, col));
}
template<int LoadMode>
PacketScalar packet(Index index) const
{
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index),
- m_rhsImpl.template packet<LoadMode>(index));
+ m_rhsImpl.template packet<LoadMode>(index));
}
protected:
@@ -520,12 +506,18 @@ protected:
// -------------------- CwiseUnaryView --------------------
template<typename UnaryOp, typename ArgType>
-struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> >
- : evaluator_impl_base<CwiseUnaryView<UnaryOp, ArgType> >
+struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
+ : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
{
typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+
+ Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
+ };
- evaluator_impl(const XprType& op)
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
: m_unaryOp(op.functor()),
m_argImpl(op.nestedExpression())
{ }
@@ -534,22 +526,22 @@ struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> >
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_unaryOp(m_argImpl.coeff(row, col));
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_unaryOp(m_argImpl.coeff(index));
}
- Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
{
return m_unaryOp(m_argImpl.coeffRef(row, col));
}
- Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
{
return m_unaryOp(m_argImpl.coeffRef(index));
}
@@ -561,13 +553,15 @@ protected:
// -------------------- Map --------------------
-template<typename Derived, int AccessorsType>
-struct evaluator_impl<MapBase<Derived, AccessorsType> >
- : evaluator_impl_base<Derived>
-{
- typedef MapBase<Derived, AccessorsType> MapType;
- typedef Derived XprType;
+// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?
+// but that might complicate template specialization
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator;
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator : evaluator_base<Derived>
+{
+ typedef Derived XprType;
typedef typename XprType::PointerType PointerType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
@@ -575,81 +569,121 @@ struct evaluator_impl<MapBase<Derived, AccessorsType> >
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
- evaluator_impl(const XprType& map)
- : m_data(const_cast<PointerType>(map.data())),
- m_rowStride(map.rowStride()),
- m_colStride(map.colStride())
- { }
-
enum {
- RowsAtCompileTime = XprType::RowsAtCompileTime
+ IsRowMajor = XprType::RowsAtCompileTime,
+ ColsAtCompileTime = XprType::ColsAtCompileTime,
+ CoeffReadCost = NumTraits<Scalar>::ReadCost
};
+
+ EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
+ : m_data(const_cast<PointerType>(map.data())),
+ m_xpr(map)
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+ }
- CoeffReturnType coeff(Index row, Index col) const
- {
- return m_data[col * m_colStride + row * m_rowStride];
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
- CoeffReturnType coeff(Index index) const
- {
- return coeff(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ {
+ return m_data[index * m_xpr.innerStride()];
}
- Scalar& coeffRef(Index row, Index col)
- {
- return m_data[col * m_colStride + row * m_rowStride];
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
+ {
+ return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
- Scalar& coeffRef(Index index)
- {
- return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
+ {
+ return m_data[index * m_xpr.innerStride()];
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
- {
- PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+ {
+ PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::ploadt<PacketScalar, LoadMode>(ptr);
}
template<int LoadMode>
PacketReturnType packet(Index index) const
- {
- return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ {
+ return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride());
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
- {
- PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+ {
+ PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
- {
- return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0,
- x);
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x);
}
protected:
PointerType m_data;
- int m_rowStride;
- int m_colStride;
+ const XprType& m_xpr;
};
template<typename PlainObjectType, int MapOptions, typename StrideType>
-struct evaluator_impl<Map<PlainObjectType, MapOptions, StrideType> >
- : public evaluator_impl<MapBase<Map<PlainObjectType, MapOptions, StrideType> > >
+struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
+ : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
{
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+ typedef typename XprType::Scalar Scalar;
+
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ HasNoInnerStride = InnerStrideAtCompileTime == 1,
+ HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+ HasNoStride = HasNoInnerStride && HasNoOuterStride,
+ IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+ IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+ KeepsPacketAccess = bool(HasNoInnerStride)
+ && ( bool(IsDynamicSize)
+ || HasNoOuterStride
+ || ( OuterStrideAtCompileTime!=Dynamic
+ && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
+ Flags0 = evaluator<PlainObjectType>::Flags,
+ Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
+ Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
+ ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
+ Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
+ };
- evaluator_impl(const XprType& map)
- : evaluator_impl<MapBase<XprType> >(map)
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
+ : mapbase_evaluator<XprType, PlainObjectType>(map)
+ { }
+};
+
+// -------------------- Ref --------------------
+
+template<typename PlainObjectType, int RefOptions, typename StrideType>
+struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
+ : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
+{
+ typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
+
+ enum {
+ Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
+ };
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
+ : mapbase_evaluator<XprType, PlainObjectType>(ref)
{ }
};
@@ -659,21 +693,68 @@ template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
-struct evaluator_impl<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef typename XprType::Scalar Scalar;
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+
+ RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,
+
+ ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,
+ IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+ : ArgTypeIsRowMajor,
+ HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor),
+ InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = HasSameStorageOrderAsArgType
+ ? int(inner_stride_at_compile_time<ArgType>::ret)
+ : int(outer_stride_at_compile_time<ArgType>::ret),
+ OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
+ ? int(outer_stride_at_compile_time<ArgType>::ret)
+ : int(inner_stride_at_compile_time<ArgType>::ret),
+ MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
+ && (InnerStrideAtCompileTime == 1)
+ ? PacketAccessBit : 0,
+
+ MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
+ FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
+ FlagsRowMajorBit = XprType::Flags&RowMajorBit,
+ Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+ DirectAccessBit |
+ MaskPacketAccessBit |
+ MaskAlignedBit),
+ Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
+ };
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
- evaluator_impl(const XprType& block) : block_evaluator_type(block) {}
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {}
};
+// no direct-access => dispatch to a unary evaluator
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false>
- : evaluator_impl_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+ : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+
+ EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+ : unary_evaluator<XprType>(block)
+ {}
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
+ : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
- block_evaluator(const XprType& block)
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
: m_argImpl(block.nestedExpression()),
m_startRow(block.startRow()),
m_startCol(block.startCol())
@@ -689,26 +770,24 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
RowsAtCompileTime = XprType::RowsAtCompileTime
};
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
- return coeff(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
- Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
}
- Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
{
- return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
template<int LoadMode>
@@ -721,7 +800,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
PacketReturnType packet(Index index) const
{
return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ RowsAtCompileTime == 1 ? index : 0);
}
template<int StoreMode>
@@ -734,8 +813,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
void writePacket(Index index, const PacketScalar& x)
{
return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0,
- x);
+ RowsAtCompileTime == 1 ? index : 0,
+ x);
}
protected:
@@ -749,24 +828,38 @@ protected:
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
- : evaluator_impl<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > >
+ : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
+ typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
- block_evaluator(const XprType& block)
- : evaluator_impl<MapBase<XprType> >(block)
- { }
+ EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+ : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
+ {
+ // FIXME this should be an internal assertion
+ eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
+ }
};
// -------------------- Select --------------------
+// TODO shall we introduce a ternary_evaluator?
+// TODO enable vectorization for Select
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
-struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
{
typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
+ + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
+ evaluator<ElseMatrixType>::CoeffReadCost),
+
+ Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
+ };
- evaluator_impl(const XprType& select)
+ inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
: m_conditionImpl(select.conditionMatrix()),
m_thenImpl(select.thenMatrix()),
m_elseImpl(select.elseMatrix())
@@ -775,7 +868,7 @@ struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
- CoeffReturnType coeff(Index row, Index col) const
+ inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
if (m_conditionImpl.coeff(row, col))
return m_thenImpl.coeff(row, col);
@@ -783,7 +876,7 @@ struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType
return m_elseImpl.coeff(row, col);
}
- CoeffReturnType coeff(Index index) const
+ inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
if (m_conditionImpl.coeff(index))
return m_thenImpl.coeff(index);
@@ -801,21 +894,33 @@ protected:
// -------------------- Replicate --------------------
template<typename ArgType, int RowFactor, int ColFactor>
-struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> >
+struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
+ : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >
{
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
-
- evaluator_impl(const XprType& replicate)
- : m_argImpl(replicate.nestedExpression()),
- m_rows(replicate.nestedExpression().rows()),
- m_cols(replicate.nestedExpression().cols())
- { }
-
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketReturnType PacketReturnType;
+ enum {
+ Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
+ };
+ typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
+ typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+
+ enum {
+ CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+
+ Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
+ };
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
+ : m_arg(replicate.nestedExpression()),
+ m_argImpl(m_arg),
+ m_rows(replicate.nestedExpression().rows()),
+ m_cols(replicate.nestedExpression().cols())
+ {}
+
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
// try to avoid using modulo; this is a pure optimization strategy
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
@@ -842,9 +947,10 @@ struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> >
}
protected:
- typename evaluator<ArgType>::nestedType m_argImpl;
- const variable_if_dynamic<Index, XprType::RowsAtCompileTime> m_rows;
- const variable_if_dynamic<Index, XprType::ColsAtCompileTime> m_cols;
+ const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product)
+ typename evaluator<ArgTypeNestedCleaned>::nestedType m_argImpl;
+ const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
+ const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
};
@@ -855,23 +961,35 @@ protected:
// the row() and col() member functions.
template< typename ArgType, typename MemberOp, int Direction>
-struct evaluator_impl<PartialReduxExpr<ArgType, MemberOp, Direction> >
+struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
+ : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
{
typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+ typedef typename XprType::Scalar InputScalar;
+ enum {
+ TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime)
+ };
+ typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
+ enum {
+ CoeffReadCost = TraversalSize==Dynamic ? Dynamic
+ : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
+
+ Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
+ };
- evaluator_impl(const XprType expr)
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr)
: m_expr(expr)
- { }
+ {}
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_expr.coeff(row, col);
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_expr.coeff(index);
}
@@ -883,16 +1001,20 @@ protected:
// -------------------- MatrixWrapper and ArrayWrapper --------------------
//
-// evaluator_impl_wrapper_base<T> is a common base class for the
+// evaluator_wrapper_base<T> is a common base class for the
// MatrixWrapper and ArrayWrapper evaluators.
template<typename XprType>
-struct evaluator_impl_wrapper_base
- : evaluator_impl_base<XprType>
+struct evaluator_wrapper_base
+ : evaluator_base<XprType>
{
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags
+ };
- evaluator_impl_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+ EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
typedef typename ArgType::Index Index;
typedef typename ArgType::Scalar Scalar;
@@ -900,22 +1022,22 @@ struct evaluator_impl_wrapper_base
typedef typename ArgType::PacketScalar PacketScalar;
typedef typename ArgType::PacketReturnType PacketReturnType;
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(row, col);
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(index);
}
- Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(row, col);
}
- Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(index);
}
@@ -949,24 +1071,24 @@ protected:
};
template<typename TArgType>
-struct evaluator_impl<MatrixWrapper<TArgType> >
- : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >
+struct unary_evaluator<MatrixWrapper<TArgType> >
+ : evaluator_wrapper_base<MatrixWrapper<TArgType> >
{
typedef MatrixWrapper<TArgType> XprType;
- evaluator_impl(const XprType& wrapper)
- : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+ : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
{ }
};
template<typename TArgType>
-struct evaluator_impl<ArrayWrapper<TArgType> >
- : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >
+struct unary_evaluator<ArrayWrapper<TArgType> >
+ : evaluator_wrapper_base<ArrayWrapper<TArgType> >
{
typedef ArrayWrapper<TArgType> XprType;
- evaluator_impl(const XprType& wrapper)
- : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+ : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
{ }
};
@@ -977,8 +1099,8 @@ struct evaluator_impl<ArrayWrapper<TArgType> >
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond;
template<typename ArgType, int Direction>
-struct evaluator_impl<Reverse<ArgType, Direction> >
- : evaluator_impl_base<Reverse<ArgType, Direction> >
+struct unary_evaluator<Reverse<ArgType, Direction> >
+ : evaluator_base<Reverse<ArgType, Direction> >
{
typedef Reverse<ArgType, Direction> XprType;
typedef typename XprType::Index Index;
@@ -997,34 +1119,44 @@ struct evaluator_impl<Reverse<ArgType, Direction> >
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
ReversePacket = (Direction == BothDirections)
|| ((Direction == Vertical) && IsColMajor)
- || ((Direction == Horizontal) && IsRowMajor)
+ || ((Direction == Horizontal) && IsRowMajor),
+
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+
+ // let's enable LinearAccess only with vectorization because of the product overhead
+ // FIXME enable DirectAccess with negative strides?
+ Flags0 = evaluator<ArgType>::Flags,
+ LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
+ ? LinearAccessBit : 0,
+
+ Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
};
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
- evaluator_impl(const XprType& reverse)
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
: m_argImpl(reverse.nestedExpression()),
m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0),
m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0)
{ }
- CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
- ReverseCol ? m_cols.value() - col - 1 : col);
+ ReverseCol ? m_cols.value() - col - 1 : col);
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
}
- Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
- ReverseCol ? m_cols.value() - col - 1 : col);
+ ReverseCol ? m_cols.value() - col - 1 : col);
}
- Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
}
@@ -1071,36 +1203,44 @@ protected:
// -------------------- Diagonal --------------------
template<typename ArgType, int DiagIndex>
-struct evaluator_impl<Diagonal<ArgType, DiagIndex> >
- : evaluator_impl_base<Diagonal<ArgType, DiagIndex> >
+struct evaluator<Diagonal<ArgType, DiagIndex> >
+ : evaluator_base<Diagonal<ArgType, DiagIndex> >
{
typedef Diagonal<ArgType, DiagIndex> XprType;
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+
+ Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
+ };
- evaluator_impl(const XprType& diagonal)
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
: m_argImpl(diagonal.nestedExpression()),
m_index(diagonal.index())
{ }
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
+ // FIXME having to check whether ArgType is sparse here i not very nice.
+ typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
+ typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
- CoeffReturnType coeff(Index row, Index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const
{
return m_argImpl.coeff(row + rowOffset(), row + colOffset());
}
- CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
return m_argImpl.coeff(index + rowOffset(), index + colOffset());
}
- Scalar& coeffRef(Index row, Index)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index)
{
return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
}
- Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
{
return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
}
@@ -1110,8 +1250,88 @@ protected:
const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
private:
- EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
- EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
+};
+
+
+//----------------------------------------------------------------------
+// deprecated code
+//----------------------------------------------------------------------
+
+// -------------------- EvalToTemp --------------------
+
+// expression class for evaluating nested expression to a temporary
+
+template<typename ArgType> class EvalToTemp;
+
+template<typename ArgType>
+struct traits<EvalToTemp<ArgType> >
+ : public traits<ArgType>
+{ };
+
+template<typename ArgType>
+class EvalToTemp
+ : public dense_xpr_base<EvalToTemp<ArgType> >::type
+{
+ public:
+
+ typedef typename dense_xpr_base<EvalToTemp>::type Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
+
+ explicit EvalToTemp(const ArgType& arg)
+ : m_arg(arg)
+ { }
+
+ const ArgType& arg() const
+ {
+ return m_arg;
+ }
+
+ Index rows() const
+ {
+ return m_arg.rows();
+ }
+
+ Index cols() const
+ {
+ return m_arg.cols();
+ }
+
+ private:
+ const ArgType& m_arg;
+};
+
+template<typename ArgType>
+struct evaluator<EvalToTemp<ArgType> >
+ : public evaluator<typename ArgType::PlainObject>::type
+{
+ typedef EvalToTemp<ArgType> XprType;
+ typedef typename ArgType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ // TODO we should simply do m_result(xpr.arg());
+ call_dense_assignment_loop(m_result, xpr.arg());
+ }
+
+ // This constructor is used when nesting an EvalTo evaluator in another evaluator
+ EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
+ : m_result(arg.rows(), arg.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ // TODO we should simply do m_result(xpr.arg());
+ call_dense_assignment_loop(m_result, arg);
+ }
+
+protected:
+ PlainObject m_result;
};
} // namespace internal
diff --git a/Eigen/src/Core/CoreIterators.h b/Eigen/src/Core/CoreIterators.h
index 6da4683d2..7feebc4e4 100644
--- a/Eigen/src/Core/CoreIterators.h
+++ b/Eigen/src/Core/CoreIterators.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -15,47 +15,116 @@ namespace Eigen {
/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
*/
-/** \ingroup SparseCore_Module
- * \class InnerIterator
- * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression
- *
- * todo
+namespace internal {
+
+template<typename XprType, typename EvaluatorKind>
+class inner_iterator_selector;
+
+}
+
+/** \class InnerIterator
+ * \brief An InnerIterator allows to loop over the element of any matrix expression.
+ *
+ * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.
+ *
+ * TODO: add a usage example
*/
+template<typename XprType>
+class InnerIterator
+{
+protected:
+ typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
+ typedef typename internal::evaluator<XprType>::type EvaluatorType;
+ typedef typename internal::traits<XprType>::Scalar Scalar;
+ typedef typename internal::traits<XprType>::Index Index;
+public:
+ /** Construct an iterator over the \a outerId -th row or column of \a xpr */
+ InnerIterator(const XprType &xpr, const Index &outerId)
+ : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize())
+ {}
+
+ /// \returns the value of the current coefficient.
+ EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); }
+ /** Increment the iterator \c *this to the next non-zero coefficient.
+ * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView
+ */
+ EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; }
+ /// \returns the column or row index of the current coefficient.
+ EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); }
+ /// \returns the row index of the current coefficient.
+ EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); }
+ /// \returns the column index of the current coefficient.
+ EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); }
+ /// \returns \c true if the iterator \c *this still references a valid coefficient.
+ EIGEN_STRONG_INLINE operator bool() const { return m_iter; }
+
+protected:
+ EvaluatorType m_eval;
+ IteratorType m_iter;
+private:
+ // If you get here, then you're not using the right InnerIterator type, e.g.:
+ // SparseMatrix<double,RowMajor> A;
+ // SparseMatrix<double>::InnerIterator it(A,0);
+ template<typename T> InnerIterator(const EigenBase<T>&,Index outer);
+};
+
+namespace internal {
-// generic version for dense matrix and expressions
-template<typename Derived> class DenseBase<Derived>::InnerIterator
+// Generic inner iterator implementation for dense objects
+template<typename XprType>
+class inner_iterator_selector<XprType, IndexBased>
{
- protected:
- typedef typename Derived::Scalar Scalar;
- typedef typename Derived::Index Index;
-
- enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit };
- public:
- EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer)
- : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize())
- {}
-
- EIGEN_STRONG_INLINE Scalar value() const
- {
- return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner)
- : m_expression.coeff(m_inner, m_outer);
- }
-
- EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; }
-
- EIGEN_STRONG_INLINE Index index() const { return m_inner; }
- inline Index row() const { return IsRowMajor ? m_outer : index(); }
- inline Index col() const { return IsRowMajor ? index() : m_outer; }
-
- EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
-
- protected:
- const Derived& m_expression;
- Index m_inner;
- const Index m_outer;
- const Index m_end;
+protected:
+ typedef typename evaluator<XprType>::type EvaluatorType;
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename traits<XprType>::Index Index;
+ enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
+
+public:
+ EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize)
+ : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize)
+ {}
+
+ EIGEN_STRONG_INLINE Scalar value() const
+ {
+ return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner)
+ : m_eval.coeff(m_inner, m_outer);
+ }
+
+ EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; }
+
+ EIGEN_STRONG_INLINE Index index() const { return m_inner; }
+ inline Index row() const { return IsRowMajor ? m_outer : index(); }
+ inline Index col() const { return IsRowMajor ? index() : m_outer; }
+
+ EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
+
+protected:
+ const EvaluatorType& m_eval;
+ Index m_inner;
+ const Index m_outer;
+ const Index m_end;
};
+// For iterator-based evaluator, inner-iterator is already implemented as
+// evaluator<>::InnerIterator
+template<typename XprType>
+class inner_iterator_selector<XprType, IteratorBased>
+ : public evaluator<XprType>::InnerIterator
+{
+protected:
+ typedef typename evaluator<XprType>::InnerIterator Base;
+ typedef typename evaluator<XprType>::type EvaluatorType;
+ typedef typename traits<XprType>::Index Index;
+
+public:
+ EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)
+ : Base(eval, outerId)
+ {}
+};
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_COREITERATORS_H
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index e20daacc8..a205c3f10 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -56,8 +56,9 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
typename Rhs::Scalar
)
>::type Scalar;
- typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
- typename traits<Rhs>::StorageKind>::ret StorageKind;
+ typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
+ typename traits<Rhs>::StorageKind,
+ BinaryOp>::ret StorageKind;
typedef typename promote_index_type<typename traits<Lhs>::Index,
typename traits<Rhs>::Index>::type Index;
typedef typename Lhs::Nested LhsNested;
@@ -65,60 +66,37 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
enum {
- LhsCoeffReadCost = _LhsNested::CoeffReadCost,
- RhsCoeffReadCost = _RhsNested::CoeffReadCost,
- LhsFlags = _LhsNested::Flags,
- RhsFlags = _RhsNested::Flags,
- SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
- StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
- Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
- HereditaryBits
- | (int(LhsFlags) & int(RhsFlags) &
- ( AlignedBit
- | (StorageOrdersAgree ? LinearAccessBit : 0)
- | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
- )
- )
- ),
- Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
- CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
+ Flags = _LhsNested::Flags & RowMajorBit
};
};
} // end namespace internal
-// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
-// that would take two operands of different types. If there were such an example, then this check should be
-// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
-// currently they take only one typename Scalar template parameter.
-// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
-// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
-// add together a float matrix and a double matrix.
-#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
- EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
- ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
- : int(internal::is_same<LHS, RHS>::value)), \
- YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
class CwiseBinaryOpImpl;
-template<typename BinaryOp, typename Lhs, typename Rhs>
-class CwiseBinaryOp : internal::no_assignment_operator,
+template<typename BinaryOp, typename LhsType, typename RhsType>
+class CwiseBinaryOp :
public CwiseBinaryOpImpl<
- BinaryOp, Lhs, Rhs,
- typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
- typename internal::traits<Rhs>::StorageKind>::ret>
+ BinaryOp, LhsType, RhsType,
+ typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+ typename internal::traits<RhsType>::StorageKind,
+ BinaryOp>::ret>,
+ internal::no_assignment_operator
{
public:
+
+ typedef typename internal::remove_all<LhsType>::type Lhs;
+ typedef typename internal::remove_all<RhsType>::type Rhs;
typedef typename CwiseBinaryOpImpl<
- BinaryOp, Lhs, Rhs,
- typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
- typename internal::traits<Rhs>::StorageKind>::ret>::Base Base;
+ BinaryOp, LhsType, RhsType,
+ typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind,
+ BinaryOp>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
- typedef typename internal::nested<Lhs>::type LhsNested;
- typedef typename internal::nested<Rhs>::type RhsNested;
+ typedef typename internal::nested<LhsType>::type LhsNested;
+ typedef typename internal::nested<RhsType>::type RhsNested;
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
@@ -165,43 +143,13 @@ class CwiseBinaryOp : internal::no_assignment_operator,
const BinaryOp m_functor;
};
-template<typename BinaryOp, typename Lhs, typename Rhs>
-class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
- : public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+// Generic API dispatcher
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl
+ : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
{
- typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
- public:
-
- typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
- EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
- {
- return derived().functor()(derived().lhs().coeff(rowId, colId),
- derived().rhs().coeff(rowId, colId));
- }
-
- template<int LoadMode>
- EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
- {
- return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(rowId, colId),
- derived().rhs().template packet<LoadMode>(rowId, colId));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
- {
- return derived().functor()(derived().lhs().coeff(index),
- derived().rhs().coeff(index));
- }
-
- template<int LoadMode>
- EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
- {
- return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(index),
- derived().rhs().template packet<LoadMode>(index));
- }
+public:
+ typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
};
/** replaces \c *this by \c *this - \a other.
@@ -213,8 +161,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
{
- SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
- tmp = other.derived();
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
return derived();
}
@@ -227,8 +174,7 @@ template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
{
- SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
- tmp = other.derived();
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
return derived();
}
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 124383114..05c4fedd0 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -35,19 +35,13 @@ template<typename NullaryOp, typename PlainObjectType>
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
{
enum {
- Flags = (traits<PlainObjectType>::Flags
- & ( HereditaryBits
- | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
- | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
- | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
- CoeffReadCost = functor_traits<NullaryOp>::Cost
+ Flags = traits<PlainObjectType>::Flags & RowMajorBit
};
};
}
template<typename NullaryOp, typename PlainObjectType>
-class CwiseNullaryOp : internal::no_assignment_operator,
- public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type
+class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
{
public:
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index aa7df197f..da1d1992d 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -44,10 +44,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum {
- Flags = _XprTypeNested::Flags & (
- HereditaryBits | LinearAccessBit | AlignedBit
- | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
- CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
+ Flags = _XprTypeNested::Flags & RowMajorBit
};
};
}
@@ -56,16 +53,16 @@ template<typename UnaryOp, typename XprType, typename StorageKind>
class CwiseUnaryOpImpl;
template<typename UnaryOp, typename XprType>
-class CwiseUnaryOp : internal::no_assignment_operator,
- public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>
+class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
{
public:
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+ typedef typename internal::remove_all<XprType>::type NestedExpression;
EIGEN_DEVICE_FUNC
- inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+ explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
: m_xpr(xpr), m_functor(func) {}
EIGEN_DEVICE_FUNC
@@ -92,42 +89,13 @@ class CwiseUnaryOp : internal::no_assignment_operator,
const UnaryOp m_functor;
};
-// This is the generic implementation for dense storage.
-// It can be used for any expression types implementing the dense concept.
-template<typename UnaryOp, typename XprType>
-class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
- : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+// Generic API dispatcher
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl
+ : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
{
- public:
-
- typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
- typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
- {
- return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
- }
-
- template<int LoadMode>
- EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
- {
- return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
- {
- return derived().functor()(derived().nestedExpression().coeff(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
- {
- return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
- }
+public:
+ typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
};
} // end namespace Eigen
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index b2638d326..6680f32dd 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -37,8 +37,8 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum {
- Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
- CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+ FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
// need to cast the sizeof's from size_t to int explicitly, otherwise:
// "error: no integral type can represent all of the enumerator values
@@ -62,8 +62,9 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
- inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
+ explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
: m_matrix(mat), m_functor(func) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
@@ -88,6 +89,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
ViewOp m_functor;
};
+// Generic API dispatcher
+template<typename ViewOp, typename XprType, typename StorageKind>
+class CwiseUnaryViewImpl
+ : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
+};
+
template<typename ViewOp, typename MatrixType>
class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
: public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
@@ -100,38 +110,18 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
- inline Scalar* data() { return &coeffRef(0); }
- inline const Scalar* data() const { return &coeff(0); }
+ EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
- inline Index innerStride() const
+ EIGEN_DEVICE_FUNC inline Index innerStride() const
{
return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
}
- inline Index outerStride() const
+ EIGEN_DEVICE_FUNC inline Index outerStride() const
{
return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
}
-
- EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
- {
- return derived().functor()(derived().nestedExpression().coeff(row, col));
- }
-
- EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return derived().functor()(derived().nestedExpression().coeff(index));
- }
-
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
- {
- return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col));
- }
-
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
- {
- return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index));
- }
};
} // end namespace Eigen
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index bd5dd14ed..e81b58481 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -50,7 +50,11 @@ template<typename Derived> class DenseBase
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
- class InnerIterator;
+
+ /** Inner iterator type to iterate over the coefficients of a row or column.
+ * \sa class InnerIterator
+ */
+ typedef Eigen::InnerIterator<Derived> InnerIterator;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
@@ -74,16 +78,6 @@ template<typename Derived> class DenseBase
using Base::colIndexByOuterInner;
using Base::coeff;
using Base::coeffByOuterInner;
- using Base::packet;
- using Base::packetByOuterInner;
- using Base::writePacket;
- using Base::writePacketByOuterInner;
- using Base::coeffRef;
- using Base::coeffRefByOuterInner;
- using Base::copyCoeff;
- using Base::copyCoeffByOuterInner;
- using Base::copyPacket;
- using Base::copyPacketByOuterInner;
using Base::operator();
using Base::operator[];
using Base::x;
@@ -169,16 +163,11 @@ template<typename Derived> class DenseBase
InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
: int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
- CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
- /**< This is a rough measure of how expensive it is to read one coefficient from
- * this expression.
- */
-
InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
};
- enum { ThisConstantIsPrivateInPlainObjectBase };
+ enum { IsPlainObjectBase = 0 };
/** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */
@@ -278,7 +267,8 @@ template<typename Derived> class DenseBase
Derived& operator=(const ReturnByValue<OtherDerived>& func);
#ifndef EIGEN_PARSED_BY_DOXYGEN
- /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
+ /** Copies \a other into *this without evaluating other. \returns a reference to *this.
+ * \deprecated */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
@@ -287,27 +277,24 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC
CommaInitializer<Derived> operator<< (const Scalar& s);
+ // TODO flagged is temporarly disabled. It seems useless now
template<unsigned int Added,unsigned int Removed>
- const Flagged<Derived, Added, Removed> flagged() const;
+ EIGEN_DEPRECATED
+ const Derived& flagged() const
+ { return derived(); }
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
+ typedef Transpose<Derived> TransposeReturnType;
EIGEN_DEVICE_FUNC
- Eigen::Transpose<Derived> transpose();
+ TransposeReturnType transpose();
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
EIGEN_DEVICE_FUNC
ConstTransposeReturnType transpose() const;
EIGEN_DEVICE_FUNC
void transposeInPlace();
-#ifndef EIGEN_NO_DEBUG
- protected:
- template<typename OtherDerived>
- void checkTransposeAliasing(const OtherDerived& other) const;
- public:
-#endif
-
EIGEN_DEVICE_FUNC static const ConstantReturnType
Constant(Index rows, Index cols, const Scalar& value);
@@ -387,16 +374,17 @@ template<typename Derived> class DenseBase
// size types on MSVC.
return typename internal::eval<Derived>::type(derived());
}
-
+
/** swaps *this with the expression \a other.
*
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- void swap(const DenseBase<OtherDerived>& other,
- int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
+ void swap(const DenseBase<OtherDerived>& other)
{
- SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+ EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+ eigen_assert(rows()==other.rows() && cols()==other.cols());
+ call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
}
/** swaps *this with the matrix or array \a other.
@@ -406,10 +394,10 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC
void swap(PlainObjectBase<OtherDerived>& other)
{
- SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+ eigen_assert(rows()==other.rows() && cols()==other.cols());
+ call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
}
-
EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 4e986e875..a9e4dbaf9 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -97,8 +97,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
{
eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- return derived().coeff(row, col);
+ && col >= 0 && col < cols());
+ return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
}
EIGEN_DEVICE_FUNC
@@ -117,7 +117,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
{
eigen_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
- return derived().coeff(row, col);
+ return coeff(row, col);
}
/** Short version: don't use this function, use
@@ -140,7 +140,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
coeff(Index index) const
{
eigen_internal_assert(index >= 0 && index < size());
- return derived().coeff(index);
+ return typename internal::evaluator<Derived>::type(derived()).coeff(index);
}
@@ -159,7 +159,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
eigen_assert(index >= 0 && index < size());
- return derived().coeff(index);
+ return coeff(index);
}
/** \returns the coefficient at given index.
@@ -177,7 +177,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
operator()(Index index) const
{
eigen_assert(index >= 0 && index < size());
- return derived().coeff(index);
+ return coeff(index);
}
/** equivalent to operator[](0). */
@@ -217,9 +217,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
{
- eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- return derived().template packet<LoadMode>(row,col);
+ eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+ return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(row,col);
}
@@ -245,7 +244,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
eigen_internal_assert(index >= 0 && index < size());
- return derived().template packet<LoadMode>(index);
+ return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(index);
}
protected:
@@ -325,8 +324,8 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
{
eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- return derived().coeffRef(row, col);
+ && col >= 0 && col < cols());
+ return typename internal::evaluator<Derived>::type(derived()).coeffRef(row,col);
}
EIGEN_DEVICE_FUNC
@@ -348,7 +347,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
{
eigen_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
- return derived().coeffRef(row, col);
+ return coeffRef(row, col);
}
@@ -372,7 +371,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
coeffRef(Index index)
{
eigen_internal_assert(index >= 0 && index < size());
- return derived().coeffRef(index);
+ return typename internal::evaluator<Derived>::type(derived()).coeffRef(index);
}
/** \returns a reference to the coefficient at given index.
@@ -389,7 +388,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
eigen_assert(index >= 0 && index < size());
- return derived().coeffRef(index);
+ return coeffRef(index);
}
/** \returns a reference to the coefficient at given index.
@@ -406,7 +405,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
operator()(Index index)
{
eigen_assert(index >= 0 && index < size());
- return derived().coeffRef(index);
+ return coeffRef(index);
}
/** equivalent to operator[](0). */
@@ -432,144 +431,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar&
w() { return (*this)[3]; }
-
- /** \internal
- * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
- * to ensure that a packet really starts there. This method is only available on expressions having the
- * PacketAccessBit.
- *
- * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
- * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
- * starting at an address which is a multiple of the packet size.
- */
-
- template<int StoreMode>
- EIGEN_STRONG_INLINE void writePacket
- (Index row, Index col, const typename internal::packet_traits<Scalar>::type& val)
- {
- eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- derived().template writePacket<StoreMode>(row,col,val);
- }
-
-
- /** \internal */
- template<int StoreMode>
- EIGEN_STRONG_INLINE void writePacketByOuterInner
- (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& val)
- {
- writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
- colIndexByOuterInner(outer, inner),
- val);
- }
-
- /** \internal
- * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
- * to ensure that a packet really starts there. This method is only available on expressions having the
- * PacketAccessBit and the LinearAccessBit.
- *
- * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
- * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
- * starting at an address which is a multiple of the packet size.
- */
- template<int StoreMode>
- EIGEN_STRONG_INLINE void writePacket
- (Index index, const typename internal::packet_traits<Scalar>::type& val)
- {
- eigen_internal_assert(index >= 0 && index < size());
- derived().template writePacket<StoreMode>(index,val);
- }
-
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-
- /** \internal Copies the coefficient at position (row,col) of other into *this.
- *
- * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
- * with usual assignments.
- *
- * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
- */
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
- {
- eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- derived().coeffRef(row, col) = other.derived().coeff(row, col);
- }
-
- /** \internal Copies the coefficient at the given index of other into *this.
- *
- * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
- * with usual assignments.
- *
- * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
- */
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
- {
- eigen_internal_assert(index >= 0 && index < size());
- derived().coeffRef(index) = other.derived().coeff(index);
- }
-
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
- {
- const Index row = rowIndexByOuterInner(outer,inner);
- const Index col = colIndexByOuterInner(outer,inner);
- // derived() is important here: copyCoeff() may be reimplemented in Derived!
- derived().copyCoeff(row, col, other);
- }
-
- /** \internal Copies the packet at position (row,col) of other into *this.
- *
- * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
- * with usual assignments.
- *
- * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
- */
-
- template<typename OtherDerived, int StoreMode, int LoadMode>
- EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
- {
- eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- derived().template writePacket<StoreMode>(row, col,
- other.derived().template packet<LoadMode>(row, col));
- }
-
- /** \internal Copies the packet at the given index of other into *this.
- *
- * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
- * with usual assignments.
- *
- * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
- */
-
- template<typename OtherDerived, int StoreMode, int LoadMode>
- EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
- {
- eigen_internal_assert(index >= 0 && index < size());
- derived().template writePacket<StoreMode>(index,
- other.derived().template packet<LoadMode>(index));
- }
-
- /** \internal */
- template<typename OtherDerived, int StoreMode, int LoadMode>
- EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
- {
- const Index row = rowIndexByOuterInner(outer,inner);
- const Index col = colIndexByOuterInner(outer,inner);
- // derived() is important here: copyCoeff() may be reimplemented in Derived!
- derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other);
- }
-#endif
-
};
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index 59f515495..852648639 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -130,7 +130,7 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
public:
EIGEN_DEVICE_FUNC DenseStorage() {}
EIGEN_DEVICE_FUNC
- DenseStorage(internal::constructor_without_unaligned_array_assert)
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()) {}
EIGEN_DEVICE_FUNC
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
@@ -155,7 +155,7 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
{
public:
EIGEN_DEVICE_FUNC DenseStorage() {}
- EIGEN_DEVICE_FUNC DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
@@ -186,7 +186,7 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
- DenseStorage(internal::constructor_without_unaligned_array_assert)
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
DenseStorage& operator=(const DenseStorage& other)
@@ -217,7 +217,7 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
DenseIndex m_rows;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
- DenseStorage(internal::constructor_without_unaligned_array_assert)
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
DenseStorage& operator=(const DenseStorage& other)
@@ -246,7 +246,7 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
- DenseStorage(internal::constructor_without_unaligned_array_assert)
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
DenseStorage& operator=(const DenseStorage& other)
@@ -276,7 +276,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
- DenseStorage(internal::constructor_without_unaligned_array_assert)
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
@@ -350,7 +350,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
DenseIndex m_cols;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
- DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
@@ -416,7 +416,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
DenseIndex m_rows;
public:
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
- DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
{ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
DenseStorage(const DenseStorage& other)
diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index b160479ab..33b82f90f 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -52,8 +52,7 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
MaxColsAtCompileTime = 1,
MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
- Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
- CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
+ Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
OuterStrideAtCompileTime = 0
@@ -71,17 +70,15 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
EIGEN_DEVICE_FUNC
- inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+ explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
EIGEN_DEVICE_FUNC
inline Index rows() const
{
- EIGEN_USING_STD_MATH(min);
- return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
- : (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
-
+ return m_index.value()<0 ? numext::mini(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
+ : numext::mini(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
}
EIGEN_DEVICE_FUNC
@@ -149,14 +146,14 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
}
EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename MatrixType::Nested>::type&
+ inline const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
}
EIGEN_DEVICE_FUNC
- int index() const
+ inline Index index() const
{
return m_index.value();
}
@@ -173,7 +170,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
- // triger a compile time error is someone try to call packet
+ // trigger a compile time error is someone try to call packet
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
};
@@ -190,7 +187,7 @@ template<typename Derived>
inline typename MatrixBase<Derived>::DiagonalReturnType
MatrixBase<Derived>::diagonal()
{
- return derived();
+ return DiagonalReturnType(derived());
}
/** This is the const version of diagonal(). */
@@ -213,18 +210,18 @@ MatrixBase<Derived>::diagonal() const
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
-inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<DynamicIndex>::Type
+inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
MatrixBase<Derived>::diagonal(Index index)
{
- return typename DiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
+ return DiagonalDynamicIndexReturnType(derived(), index);
}
/** This is the const version of diagonal(Index). */
template<typename Derived>
-inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<DynamicIndex>::Type
+inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
MatrixBase<Derived>::diagonal(Index index) const
{
- return typename ConstDiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
+ return ConstDiagonalDynamicIndexReturnType(derived(), index);
}
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
@@ -239,20 +236,20 @@ MatrixBase<Derived>::diagonal(Index index) const
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
-template<int Index>
-inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type
+template<int Index_>
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
MatrixBase<Derived>::diagonal()
{
- return derived();
+ return typename DiagonalIndexReturnType<Index_>::Type(derived());
}
/** This is the const version of diagonal<int>(). */
template<typename Derived>
-template<int Index>
-inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type
+template<int Index_>
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
MatrixBase<Derived>::diagonal() const
{
- return derived();
+ return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 96b65483d..49b9b7925 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -30,7 +30,7 @@ class DiagonalBase : public EigenBase<Derived>
MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
IsVectorAtCompileTime = 0,
- Flags = 0
+ Flags = NoPreferredStorageOrderBit
};
typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
@@ -44,18 +44,7 @@ class DiagonalBase : public EigenBase<Derived>
EIGEN_DEVICE_FUNC
DenseMatrixType toDenseMatrix() const { return derived(); }
- template<typename DenseDerived>
- EIGEN_DEVICE_FUNC
- void evalTo(MatrixBase<DenseDerived> &other) const;
- template<typename DenseDerived>
- EIGEN_DEVICE_FUNC
- void addTo(MatrixBase<DenseDerived> &other) const
- { other.diagonal() += diagonal(); }
- template<typename DenseDerived>
- EIGEN_DEVICE_FUNC
- void subTo(MatrixBase<DenseDerived> &other) const
- { other.diagonal() -= diagonal(); }
-
+
EIGEN_DEVICE_FUNC
inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
EIGEN_DEVICE_FUNC
@@ -66,44 +55,37 @@ class DiagonalBase : public EigenBase<Derived>
EIGEN_DEVICE_FUNC
inline Index cols() const { return diagonal().size(); }
- /** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
- */
template<typename MatrixDerived>
EIGEN_DEVICE_FUNC
- const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
+ const Product<Derived,MatrixDerived,LazyProduct>
operator*(const MatrixBase<MatrixDerived> &matrix) const
{
- return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
+ return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
}
+ typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
EIGEN_DEVICE_FUNC
- inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
+ inline const InverseReturnType
inverse() const
{
- return diagonal().cwiseInverse();
+ return InverseReturnType(diagonal().cwiseInverse());
}
+ typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > ScalarMultipleReturnType;
EIGEN_DEVICE_FUNC
- inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+ inline const ScalarMultipleReturnType
operator*(const Scalar& scalar) const
{
- return diagonal() * scalar;
+ return ScalarMultipleReturnType(diagonal() * scalar);
}
EIGEN_DEVICE_FUNC
- friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+ friend inline const ScalarMultipleReturnType
operator*(const Scalar& scalar, const DiagonalBase& other)
{
- return other.diagonal() * scalar;
+ return ScalarMultipleReturnType(other.diagonal() * scalar);
}
};
-template<typename Derived>
-template<typename DenseDerived>
-void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
-{
- other.setZero();
- other.diagonal() = diagonal();
-}
#endif
/** \class DiagonalMatrix
@@ -125,10 +107,10 @@ struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
: traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
- typedef Dense StorageKind;
+ typedef DiagonalShape StorageKind;
typedef DenseIndex Index;
enum {
- Flags = LvalueBit
+ Flags = LvalueBit | NoPreferredStorageOrderBit
};
};
}
@@ -164,7 +146,7 @@ class DiagonalMatrix
/** Constructs a diagonal matrix with given dimension */
EIGEN_DEVICE_FUNC
- inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+ explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
/** 2D constructor. */
EIGEN_DEVICE_FUNC
@@ -249,13 +231,14 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
typedef _DiagonalVectorType DiagonalVectorType;
typedef typename DiagonalVectorType::Scalar Scalar;
typedef typename DiagonalVectorType::Index Index;
- typedef typename DiagonalVectorType::StorageKind StorageKind;
+ typedef DiagonalShape StorageKind;
+ typedef typename traits<DiagonalVectorType>::XprKind XprKind;
enum {
RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
- MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
- MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
- Flags = traits<DiagonalVectorType>::Flags & LvalueBit
+ MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+ MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+ Flags = (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
};
};
}
@@ -272,7 +255,7 @@ class DiagonalWrapper
/** Constructor from expression of diagonal coefficients to wrap. */
EIGEN_DEVICE_FUNC
- inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
+ explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
EIGEN_DEVICE_FUNC
@@ -295,7 +278,7 @@ template<typename Derived>
inline const DiagonalWrapper<const Derived>
MatrixBase<Derived>::asDiagonal() const
{
- return derived();
+ return DiagonalWrapper<const Derived>(derived());
}
/** \returns true if *this is approximately equal to a diagonal matrix,
@@ -326,6 +309,33 @@ bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
return true;
}
+namespace internal {
+
+template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };
+
+struct Diagonal2Dense {};
+
+template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
+
+// Diagonal matrix to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense, Scalar>
+{
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+ {
+ dst.setZero();
+ dst.diagonal() = src.diagonal();
+ }
+
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/)
+ { dst.diagonal() += src.diagonal(); }
+
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/)
+ { dst.diagonal() -= src.diagonal(); }
+};
+
+} // namespace internal
+
} // end namespace Eigen
#endif // EIGEN_DIAGONALMATRIX_H
diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h
index c03a0c2e1..d372b938f 100644
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -13,116 +13,14 @@
namespace Eigen {
-namespace internal {
-template<typename MatrixType, typename DiagonalType, int ProductOrder>
-struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
- : traits<MatrixType>
-{
- typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime,
- MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-
- _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
- _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
- ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
- _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
- // FIXME currently we need same types, but in the future the next rule should be the one
- //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
- _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
- _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
-
- Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
- CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
- };
-};
-}
-
-template<typename MatrixType, typename DiagonalType, int ProductOrder>
-class DiagonalProduct : internal::no_assignment_operator,
- public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
-{
- public:
-
- typedef MatrixBase<DiagonalProduct> Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct)
-
- inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
- : m_matrix(matrix), m_diagonal(diagonal)
- {
- eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
- }
-
- EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
- EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
-
- EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
- {
- return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col);
- }
-
- EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
- {
- enum {
- StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
- };
- return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
- }
-
- template<int LoadMode>
- EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
- {
- enum {
- StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
- };
- const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
- return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
- ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
- ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
- }
-
- template<int LoadMode>
- EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
- {
- enum {
- StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
- };
- return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
- }
-
- protected:
- template<int LoadMode>
- EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
- {
- return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
- internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
- }
-
- template<int LoadMode>
- EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
- {
- enum {
- InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
- DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
- };
- return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
- m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
- }
-
- typename MatrixType::Nested m_matrix;
- typename DiagonalType::Nested m_diagonal;
-};
-
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
*/
template<typename Derived>
template<typename DiagonalDerived>
-inline const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+inline const Product<Derived, DiagonalDerived, LazyProduct>
MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
{
- return DiagonalProduct<Derived, DiagonalDerived, OnTheRight>(derived(), a_diagonal.derived());
+ return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index db16e4acc..68e9c2660 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -113,8 +113,7 @@ template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const
{
- typedef typename internal::nested<Derived>::type Nested;
- typedef typename internal::remove_reference<Nested>::type _Nested;
+ typedef typename internal::nested_eval<Derived,2>::type _Nested;
_Nested n(derived());
return n / n.norm();
}
@@ -206,8 +205,8 @@ template<typename OtherDerived>
bool MatrixBase<Derived>::isOrthogonal
(const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
{
- typename internal::nested<Derived,2>::type nested(derived());
- typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
+ typename internal::nested_eval<Derived,2>::type nested(derived());
+ typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());
return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
}
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 1a577c2dc..52b66e6dc 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -121,7 +121,7 @@ template<typename Derived>
template<typename OtherDerived>
Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
{
- other.derived().evalTo(derived());
+ call_assignment(derived(), other.derived());
return derived();
}
@@ -129,7 +129,7 @@ template<typename Derived>
template<typename OtherDerived>
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
{
- other.derived().addTo(derived());
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
return derived();
}
@@ -137,7 +137,7 @@ template<typename Derived>
template<typename OtherDerived>
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
{
- other.derived().subTo(derived());
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
return derived();
}
diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h
index 1f2955fc1..2e2a50be5 100644
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -48,39 +48,39 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
typedef typename ExpressionType::InnerIterator InnerIterator;
- inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
+ explicit inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
- inline Index rows() const { return m_matrix.rows(); }
- inline Index cols() const { return m_matrix.cols(); }
- inline Index outerStride() const { return m_matrix.outerStride(); }
- inline Index innerStride() const { return m_matrix.innerStride(); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
- inline CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
{
return m_matrix.coeff(row, col);
}
- inline CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(index);
}
- inline const Scalar& coeffRef(Index row, Index col) const
+ EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.const_cast_derived().coeffRef(row, col);
}
- inline const Scalar& coeffRef(Index index) const
+ EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index);
}
- inline Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(row, col);
}
- inline Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
{
return m_matrix.const_cast_derived().coeffRef(index);
}
@@ -109,13 +109,13 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
}
- const ExpressionType& _expression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC const ExpressionType& _expression() const { return m_matrix; }
template<typename OtherDerived>
- typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
+ EIGEN_DEVICE_FUNC typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived>
- void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
+ EIGEN_DEVICE_FUNC void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
protected:
ExpressionTypeNested m_matrix;
@@ -132,7 +132,7 @@ template<unsigned int Added,unsigned int Removed>
inline const Flagged<Derived, Added, Removed>
DenseBase<Derived>::flagged() const
{
- return derived();
+ return Flagged<Derived, Added, Removed>(derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h
index 807c7a293..7b08b45e6 100644
--- a/Eigen/src/Core/ForceAlignedAccess.h
+++ b/Eigen/src/Core/ForceAlignedAccess.h
@@ -39,29 +39,29 @@ template<typename ExpressionType> class ForceAlignedAccess
typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
- inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+ EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
- inline Index rows() const { return m_expression.rows(); }
- inline Index cols() const { return m_expression.cols(); }
- inline Index outerStride() const { return m_expression.outerStride(); }
- inline Index innerStride() const { return m_expression.innerStride(); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
- inline const CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
- inline Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
{
return m_expression.const_cast_derived().coeffRef(row, col);
}
- inline const CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
- inline Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
{
return m_expression.const_cast_derived().coeffRef(index);
}
@@ -90,7 +90,7 @@ template<typename ExpressionType> class ForceAlignedAccess
m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
}
- operator const ExpressionType&() const { return m_expression; }
+ EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
protected:
const ExpressionType& m_expression;
@@ -127,7 +127,7 @@ template<bool Enable>
inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
MatrixBase<Derived>::forceAlignedAccessIf() const
{
- return derived();
+ return derived(); // FIXME This should not work but apparently is never used
}
/** \returns an expression of *this with forced aligned access if \a Enable is true.
@@ -138,7 +138,7 @@ template<bool Enable>
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
MatrixBase<Derived>::forceAlignedAccessIf()
{
- return derived();
+ return derived(); // FIXME This should not work but apparently is never used
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h
index f9a88dd3c..3e403a09d 100644
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -22,10 +22,9 @@ struct isApprox_selector
EIGEN_DEVICE_FUNC
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
{
- EIGEN_USING_STD_MATH(min);
- typename internal::nested<Derived,2>::type nested(x);
- typename internal::nested<OtherDerived,2>::type otherNested(y);
- return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+ typename internal::nested_eval<Derived,2>::type nested(x);
+ typename internal::nested_eval<OtherDerived,2>::type otherNested(y);
+ return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
}
};
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 9d3d5562c..81750722c 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -13,28 +13,6 @@
namespace Eigen {
-/** \class GeneralProduct
- * \ingroup Core_Module
- *
- * \brief Expression of the product of two general matrices or vectors
- *
- * \param LhsNested the type used to store the left-hand side
- * \param RhsNested the type used to store the right-hand side
- * \param ProductMode the type of the product
- *
- * This class represents an expression of the product of two general matrices.
- * We call a general matrix, a dense matrix with full storage. For instance,
- * This excludes triangular, selfadjoint, and sparse matrices.
- * It is the return type of the operator* between general matrices. Its template
- * arguments are determined automatically by ProductReturnType. Therefore,
- * GeneralProduct should never be used direclty. To determine the result type of a
- * function which involves a matrix product, use ProductReturnType::Type.
- *
- * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
- */
-template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
-class GeneralProduct;
-
enum {
Large = 2,
Small = 3
@@ -59,14 +37,14 @@ template<typename Lhs, typename Rhs> struct product_type
typedef typename remove_all<Lhs>::type _Lhs;
typedef typename remove_all<Rhs>::type _Rhs;
enum {
- MaxRows = _Lhs::MaxRowsAtCompileTime,
- Rows = _Lhs::RowsAtCompileTime,
- MaxCols = _Rhs::MaxColsAtCompileTime,
- Cols = _Rhs::ColsAtCompileTime,
- MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
- _Rhs::MaxRowsAtCompileTime),
- Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
- _Rhs::RowsAtCompileTime)
+ MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
+ Rows = traits<_Lhs>::RowsAtCompileTime,
+ MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
+ Cols = traits<_Rhs>::ColsAtCompileTime,
+ MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
+ traits<_Rhs>::MaxRowsAtCompileTime),
+ Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
+ traits<_Rhs>::RowsAtCompileTime)
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
@@ -81,7 +59,8 @@ private:
public:
enum {
- value = selector::ret
+ value = selector::ret,
+ ret = selector::ret
};
#ifdef EIGEN_DEBUG_PRODUCT
static void debug()
@@ -97,6 +76,31 @@ public:
#endif
};
+// template<typename Lhs, typename Rhs> struct product_tag
+// {
+// private:
+//
+// typedef typename remove_all<Lhs>::type _Lhs;
+// typedef typename remove_all<Rhs>::type _Rhs;
+// enum {
+// Rows = _Lhs::RowsAtCompileTime,
+// Cols = _Rhs::ColsAtCompileTime,
+// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
+// };
+//
+// enum {
+// rows_select = Rows==1 ? int(Rows) : int(Large),
+// cols_select = Cols==1 ? int(Cols) : int(Large),
+// depth_select = Depth==1 ? int(Depth) : int(Large)
+// };
+// typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+//
+// public:
+// enum {
+// ret = selector::ret
+// };
+//
+// };
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
@@ -127,54 +131,6 @@ template<> struct product_type_selector<Large,Large,Small> { enum
} // end namespace internal
-/** \class ProductReturnType
- * \ingroup Core_Module
- *
- * \brief Helper class to get the correct and optimized returned type of operator*
- *
- * \param Lhs the type of the left-hand side
- * \param Rhs the type of the right-hand side
- * \param ProductMode the type of the product (determined automatically by internal::product_mode)
- *
- * This class defines the typename Type representing the optimized product expression
- * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
- * is the recommended way to define the result type of a function returning an expression
- * which involve a matrix product. The class Product should never be
- * used directly.
- *
- * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
- */
-template<typename Lhs, typename Rhs, int ProductType>
-struct ProductReturnType
-{
- // TODO use the nested type to reduce instanciations ????
-// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
-// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
-
- typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
-{
- typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
- typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
- typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{
- typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
- typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
- typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
-};
-
-// this is a workaround for sun CC
-template<typename Lhs, typename Rhs>
-struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{};
-
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
@@ -186,119 +142,10 @@ struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedPr
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
-namespace internal {
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
- : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, InnerProduct>
- : internal::no_assignment_operator,
- public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
-{
- typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
- public:
- GeneralProduct(const Lhs& lhs, const Rhs& rhs)
- {
- EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
- YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
- Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
- }
-
- /** Convertion to scalar */
- operator const typename Base::Scalar() const {
- return Base::coeff(0,0);
- }
-};
-
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
-namespace internal {
-
-// Column major
-template<typename ProductType, typename Dest, typename Func>
-EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
-{
- typedef typename Dest::Index Index;
- // FIXME make sure lhs is sequentially stored
- // FIXME not very good if rhs is real and lhs complex while alpha is real too
- const Index cols = dest.cols();
- for (Index j=0; j<cols; ++j)
- func(dest.col(j), prod.rhs().coeff(0,j) * prod.lhs());
-}
-
-// Row major
-template<typename ProductType, typename Dest, typename Func>
-EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
- typedef typename Dest::Index Index;
- // FIXME make sure rhs is sequentially stored
- // FIXME not very good if lhs is real and rhs complex while alpha is real too
- const Index rows = dest.rows();
- for (Index i=0; i<rows; ++i)
- func(dest.row(i), prod.lhs().coeff(i,0) * prod.rhs());
-}
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, OuterProduct>
- : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
-{
- template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
-
- public:
- EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
- GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
- {
- EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
- YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
- }
-
- struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
- struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
- struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
- struct adds {
- Scalar m_scale;
- adds(const Scalar& s) : m_scale(s) {}
- template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
- dst.const_cast_derived() += m_scale * src;
- }
- };
-
- template<typename Dest>
- inline void evalTo(Dest& dest) const {
- internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
- }
-
- template<typename Dest>
- inline void addTo(Dest& dest) const {
- internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
- }
-
- template<typename Dest>
- inline void subTo(Dest& dest) const {
- internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
- }
-
- template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
- {
- internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
- }
-};
-
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
@@ -312,60 +159,13 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
*/
namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
-{};
-
template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_selector;
+struct gemv_dense_sense_selector;
} // end namespace internal
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, GemvProduct>
- : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
-{
- public:
- EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
- typedef typename Lhs::Scalar LhsScalar;
- typedef typename Rhs::Scalar RhsScalar;
-
- GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs)
- {
-// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
-// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
- }
-
- enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
- typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
-
- template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
- {
- eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
- internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
- bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
- }
-};
-
namespace internal {
-// The vector is on the left => transposition
-template<int StorageOrder, bool BlasCompatible>
-struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
-{
- template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
- {
- Transpose<Dest> destT(dest);
- enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
- gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
- ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
- (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
- }
-};
-
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
template<typename Scalar,int Size,int MaxSize>
@@ -402,27 +202,43 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
#endif
};
-template<> struct gemv_selector<OnTheRight,ColMajor,true>
+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ Transpose<Dest> destT(dest);
+ enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+ gemv_dense_sense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+ ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
+ }
+};
+
+template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
{
- template<typename ProductType, typename Dest>
- static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ template<typename Lhs, typename Rhs, typename Dest>
+ static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
- typedef typename ProductType::Index Index;
- typedef typename ProductType::LhsScalar LhsScalar;
- typedef typename ProductType::RhsScalar RhsScalar;
- typedef typename ProductType::Scalar ResScalar;
- typedef typename ProductType::RealScalar RealScalar;
- typedef typename ProductType::ActualLhsType ActualLhsType;
- typedef typename ProductType::ActualRhsType ActualRhsType;
- typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
- typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+ typedef typename Dest::Index Index;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+ typedef typename Dest::RealScalar RealScalar;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
- ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
- ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
+ ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
+ ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
- * RhsBlasTraits::extractScalarFactor(prod.rhs());
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -477,34 +293,35 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
}
};
-template<> struct gemv_selector<OnTheRight,RowMajor,true>
+template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,true>
{
- template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
- typedef typename ProductType::LhsScalar LhsScalar;
- typedef typename ProductType::RhsScalar RhsScalar;
- typedef typename ProductType::Scalar ResScalar;
- typedef typename ProductType::Index Index;
- typedef typename ProductType::ActualLhsType ActualLhsType;
- typedef typename ProductType::ActualRhsType ActualRhsType;
- typedef typename ProductType::_ActualRhsType _ActualRhsType;
- typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
- typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-
- typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
- typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
- * RhsBlasTraits::extractScalarFactor(prod.rhs());
+ typedef typename Dest::Index Index;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+ typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+ typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
- DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+ DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
};
- gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+ gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
@@ -515,7 +332,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
Index size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
- Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+ Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
@@ -530,29 +347,29 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
}
};
-template<> struct gemv_selector<OnTheRight,ColMajor,false>
+template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,false>
{
- template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
- const Index size = prod.rhs().rows();
+ const Index size = rhs.rows();
for(Index k=0; k<size; ++k)
- dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
+ dest += (alpha*rhs.coeff(k)) * lhs.col(k);
}
};
-template<> struct gemv_selector<OnTheRight,RowMajor,false>
+template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,false>
{
- template<typename ProductType, typename Dest>
- static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
typedef typename Dest::Index Index;
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
- const Index rows = prod.rows();
+ const Index rows = dest.rows();
for(Index i=0; i<rows; ++i)
- dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
+ dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(rhs.transpose())).sum();
}
};
@@ -570,7 +387,6 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
*/
#ifndef __CUDACC__
-#ifdef EIGEN_TEST_EVALUATORS
template<typename Derived>
template<typename OtherDerived>
inline const Product<Derived, OtherDerived>
@@ -601,39 +417,9 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
return Product<Derived, OtherDerived>(derived(), other.derived());
}
-#else
-template<typename Derived>
-template<typename OtherDerived>
-inline const typename ProductReturnType<Derived, OtherDerived>::Type
-MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
-{
- // A note regarding the function declaration: In MSVC, this function will sometimes
- // not be inlined since DenseStorage is an unwindable object for dynamic
- // matrices and product types are holding a member to store the result.
- // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
- enum {
- ProductIsValid = Derived::ColsAtCompileTime==Dynamic
- || OtherDerived::RowsAtCompileTime==Dynamic
- || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
- AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
- SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
- };
- // note to the lost user:
- // * for a dot product use: v1.dot(v2)
- // * for a coeff-wise product use: v1.cwiseProduct(v2)
- EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
- INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
- EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
- INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
- EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-#ifdef EIGEN_DEBUG_PRODUCT
- internal::product_type<Derived,OtherDerived>::debug();
-#endif
- return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
-}
-#endif
-#endif
+#endif // __CUDACC__
+
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
@@ -647,7 +433,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
*/
template<typename Derived>
template<typename OtherDerived>
-const typename LazyProductReturnType<Derived,OtherDerived>::Type
+const Product<Derived,OtherDerived,LazyProduct>
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{
enum {
@@ -666,7 +452,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
- return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+ return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 3ef3475c7..8759cd06c 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -129,12 +129,12 @@ pdiv(const Packet& a,
/** \internal \returns the min of \a a and \a b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pmin(const Packet& a,
- const Packet& b) { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
+ const Packet& b) { return numext::mini(a, b); }
/** \internal \returns the max of \a a and \a b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pmax(const Packet& a,
- const Packet& b) { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
+ const Packet& b) { return numext::maxi(a, b); }
/** \internal \returns the absolute value of \a a */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -245,8 +245,8 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
/** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> inline void prefetch(const Scalar* addr)
{
-#if !defined(_MSC_VER)
-__builtin_prefetch(addr);
+#if !EIGEN_COMP_MSVC
+ __builtin_prefetch(addr);
#endif
}
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index 2067a2a6e..ee67b7d3c 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -15,7 +15,7 @@
template<typename Derived> \
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
NAME(const Eigen::ArrayBase<Derived>& x) { \
- return x.derived(); \
+ return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
}
#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
@@ -30,7 +30,7 @@
{ \
static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \
{ \
- return x.derived(); \
+ return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \
} \
};
diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h
new file mode 100644
index 000000000..f3b0dff87
--- /dev/null
+++ b/Eigen/src/Core/Inverse.h
@@ -0,0 +1,129 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INVERSE_H
+#define EIGEN_INVERSE_H
+
+namespace Eigen {
+
+// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this...
+
+template<typename XprType,typename StorageKind> class InverseImpl;
+
+namespace internal {
+
+template<typename XprType>
+struct traits<Inverse<XprType> >
+ : traits<typename XprType::PlainObject>
+{
+ typedef typename XprType::PlainObject PlainObject;
+ typedef traits<PlainObject> BaseTraits;
+ enum {
+ Flags = BaseTraits::Flags & RowMajorBit
+ };
+};
+
+} // end namespace internal
+
+/** \class Inverse
+ *
+ * \brief Expression of the inverse of another expression
+ *
+ * \tparam XprType the type of the expression we are taking the inverse
+ *
+ * This class represents an abstract expression of A.inverse()
+ * and most of the time this is the only way it is used.
+ *
+ */
+template<typename XprType>
+class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>
+{
+public:
+ typedef typename XprType::Index Index;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename internal::nested<XprType>::type XprTypeNested;
+ typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
+
+ explicit Inverse(const XprType &xpr)
+ : m_xpr(xpr)
+ {}
+
+ EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+
+ EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
+
+protected:
+ XprTypeNested m_xpr;
+};
+
+/** \internal
+ * Specialization of the Inverse expression for dense expressions.
+ * Direct access to the coefficients are discared.
+ * FIXME this intermediate class is probably not needed anymore.
+ */
+template<typename XprType>
+class InverseImpl<XprType,Dense>
+ : public MatrixBase<Inverse<XprType> >
+{
+ typedef Inverse<XprType> Derived;
+
+public:
+
+ typedef MatrixBase<Derived> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+ typedef typename internal::remove_all<XprType>::type NestedExpression;
+
+private:
+
+ Scalar coeff(Index row, Index col) const;
+ Scalar coeff(Index i) const;
+};
+
+namespace internal {
+
+/** \internal
+ * \brief Default evaluator for Inverse expression.
+ *
+ * This default evaluator for Inverse expression simply evaluate the inverse into a temporary
+ * by a call to internal::call_assignment_no_alias.
+ * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
+ * there own nested expression.
+ *
+ * \sa class Inverse
+ */
+template<typename ArgType>
+struct unary_evaluator<Inverse<ArgType> >
+ : public evaluator<typename Inverse<ArgType>::PlainObject>::type
+{
+ typedef Inverse<ArgType> InverseType;
+ typedef typename InverseType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ typedef evaluator<InverseType> type;
+ typedef evaluator<InverseType> nestedType;
+
+ enum { Flags = Base::Flags | EvalBeforeNestingBit };
+
+ unary_evaluator(const InverseType& inv_xpr)
+ : m_result(inv_xpr.rows(), inv_xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ internal::call_assignment_no_alias(m_result, inv_xpr);
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_H
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index ced1b76ba..098f1c096 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -79,22 +79,9 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
- HasNoInnerStride = InnerStrideAtCompileTime == 1,
- HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
- HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
- IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
- KeepsPacketAccess = bool(HasNoInnerStride)
- && ( bool(IsDynamicSize)
- || HasNoOuterStride
- || ( OuterStrideAtCompileTime!=Dynamic
- && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
- Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
- Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
- ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
- Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
- Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
+ Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};
private:
enum { Options }; // Expressions don't have Options
@@ -135,7 +122,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
* \param a_stride optional Stride object, passing the strides.
*/
EIGEN_DEVICE_FUNC
- inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
+ explicit inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
: Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
{
PlainObjectType::Base::_check_template_params();
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index e8ecb175b..1589cbaae 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -12,7 +12,7 @@
#define EIGEN_MAPBASE_H
#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
- EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+ EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
namespace Eigen {
@@ -85,7 +85,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
*
* \sa innerStride(), outerStride()
*/
- inline const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
EIGEN_DEVICE_FUNC
inline const Scalar& coeff(Index rowId, Index colId) const
@@ -128,7 +128,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
- inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+ explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
checkSanity();
@@ -161,11 +161,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_DEVICE_FUNC
void checkSanity() const
{
- EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
- internal::inner_stride_at_compile_time<Derived>::ret==1),
- PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
- eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0)
- && "data is not aligned");
+ eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
}
PointerType m_data;
@@ -176,6 +172,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
template<typename Derived> class MapBase<Derived, WriteAccessors>
: public MapBase<Derived, ReadOnlyAccessors>
{
+ typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
public:
typedef MapBase<Derived, ReadOnlyAccessors> Base;
@@ -243,11 +240,13 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
EIGEN_DEVICE_FUNC
Derived& operator=(const MapBase& other)
{
- Base::Base::operator=(other);
+ ReadOnlyMapBase::Base::operator=(other);
return derived();
}
- using Base::Base::operator=;
+ // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
+ // see bugs 821 and 920.
+ using ReadOnlyMapBase::Base::operator=;
};
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index e9fed2e52..16ad2dc7e 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -14,7 +14,7 @@ namespace Eigen {
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
-#if defined(_WIN32_WCE) && defined(_MSC_VER) && _MSC_VER<=1500
+#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
long abs(long x) { return (labs(x)); }
double abs(double x) { return (fabs(x)); }
float abs(float x) { return (fabsf(x)); }
@@ -360,50 +360,31 @@ inline NewType cast(const OldType& x)
}
/****************************************************************************
-* Implementation of atanh2 *
+* Implementation of logp1 *
****************************************************************************/
template<typename Scalar>
-struct atanh2_impl
+struct log1p_impl
{
- static inline Scalar run(const Scalar& x, const Scalar& r)
+ static inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
- #if (__cplusplus >= 201103L) && !defined(__CYGWIN__)
+ // Let's be conservative and enable the default C++11 implementation only if we are sure it exists
+ #if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
+ && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)
using std::log1p;
- return log1p(2 * x / (r - x)) / 2;
+ return log1p(x);
#else
- using std::abs;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
using std::log;
- using std::sqrt;
- Scalar z = x / r;
- if (r == 0 || abs(z) > sqrt(NumTraits<Scalar>::epsilon()))
- return log((r + x) / (r - x)) / 2;
- else
- return z + z*z*z / 3;
+ Scalar x1p = RealScalar(1) + x;
+ return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
#endif
}
};
-template<typename RealScalar>
-struct atanh2_impl<std::complex<RealScalar> >
-{
- typedef std::complex<RealScalar> Scalar;
- static inline Scalar run(const Scalar& x, const Scalar& r)
- {
- using std::log;
- using std::norm;
- using std::sqrt;
- Scalar z = x / r;
- if (r == Scalar(0) || norm(z) > NumTraits<RealScalar>::epsilon())
- return RealScalar(0.5) * log((r + x) / (r - x));
- else
- return z + z*z*z / RealScalar(3);
- }
-};
-
template<typename Scalar>
-struct atanh2_retval
+struct log1p_retval
{
typedef Scalar type;
};
@@ -591,6 +572,22 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
****************************************************************************/
namespace numext {
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+inline T mini(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(min);
+ return min EIGEN_NOT_A_MACRO (x,y);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+inline T maxi(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(max);
+ return max EIGEN_NOT_A_MACRO (x,y);
+}
template<typename Scalar>
EIGEN_DEVICE_FUNC
@@ -664,9 +661,9 @@ inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar&
template<typename Scalar>
EIGEN_DEVICE_FUNC
-inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y)
+inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
{
- return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y);
+ return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
}
template<typename Scalar>
@@ -694,6 +691,21 @@ bool (isfinite)(const std::complex<T>& x)
return isfinite(real(x)) && isfinite(imag(x));
}
+// Log base 2 for 32 bits positive integers.
+// Conveniently returns 0 for x==0.
+inline int log2(int x)
+{
+ eigen_assert(x>=0);
+ unsigned int v(x);
+ static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return table[(v * 0x07C4ACDDU) >> 27];
+}
+
} // end namespace numext
namespace internal {
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 8c95ee3ca..0b3d90786 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -115,7 +115,8 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
- CoeffReadCost = NumTraits<Scalar>::ReadCost,
+ // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
+ EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
@@ -213,7 +214,7 @@ class Matrix
// FIXME is it still needed
EIGEN_DEVICE_FUNC
- Matrix(internal::constructor_without_unaligned_array_assert)
+ explicit Matrix(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
@@ -359,15 +360,6 @@ class Matrix
*this = other;
}
- /** \internal
- * \brief Override MatrixBase::swap() since for dynamic-sized matrices
- * of same type it is enough to swap the data pointers.
- */
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- void swap(MatrixBase<OtherDerived> const & other)
- { this->_swap(other.derived()); }
-
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 3cb5e04fd..86994cb36 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -66,8 +66,7 @@ template<typename Derived> class MatrixBase
using Base::MaxSizeAtCompileTime;
using Base::IsVectorAtCompileTime;
using Base::Flags;
- using Base::CoeffReadCost;
-
+
using Base::derived;
using Base::const_cast_derived;
using Base::rows;
@@ -181,26 +180,20 @@ template<typename Derived> class MatrixBase
#ifdef __CUDACC__
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- const typename LazyProductReturnType<Derived,OtherDerived>::Type
+ const Product<Derived,OtherDerived,LazyProduct>
operator*(const MatrixBase<OtherDerived> &other) const
{ return this->lazyProduct(other); }
#else
-#ifdef EIGEN_TEST_EVALUATORS
template<typename OtherDerived>
const Product<Derived,OtherDerived>
operator*(const MatrixBase<OtherDerived> &other) const;
-#else
- template<typename OtherDerived>
- const typename ProductReturnType<Derived,OtherDerived>::Type
- operator*(const MatrixBase<OtherDerived> &other) const;
-#endif
#endif
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- const typename LazyProductReturnType<Derived,OtherDerived>::Type
+ const Product<Derived,OtherDerived,LazyProduct>
lazyProduct(const MatrixBase<OtherDerived> &other) const;
template<typename OtherDerived>
@@ -214,7 +207,7 @@ template<typename Derived> class MatrixBase
template<typename DiagonalDerived>
EIGEN_DEVICE_FUNC
- const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+ const Product<Derived, DiagonalDerived, LazyProduct>
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
template<typename OtherDerived>
@@ -251,19 +244,14 @@ template<typename Derived> class MatrixBase
template<int Index>
EIGEN_DEVICE_FUNC
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+
+ typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
+ typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
- // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
- // On the other hand they confuse MSVC8...
- #if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later
- typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
- typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
- #else
EIGEN_DEVICE_FUNC
- typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
-
+ DiagonalDynamicIndexReturnType diagonal(Index index);
EIGEN_DEVICE_FUNC
- typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
- #endif
+ ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
@@ -334,10 +322,12 @@ template<typename Derived> class MatrixBase
NoAlias<Derived,Eigen::MatrixBase > noalias();
- inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
- inline ForceAlignedAccess<Derived> forceAlignedAccess();
- template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const;
- template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+ // TODO forceAlignedAccess is temporarily disabled
+ // Need to find a nicer workaround.
+ inline const Derived& forceAlignedAccess() const { return derived(); }
+ inline Derived& forceAlignedAccess() { return derived(); }
+ template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
+ template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
Scalar trace() const;
@@ -348,10 +338,10 @@ template<typename Derived> class MatrixBase
/** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
* \sa ArrayBase::matrix() */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return derived(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
/** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
* \sa ArrayBase::matrix() */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return derived(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
/////////// LU module ///////////
@@ -361,7 +351,8 @@ template<typename Derived> class MatrixBase
const PartialPivLU<PlainObject> lu() const;
EIGEN_DEVICE_FUNC
- const internal::inverse_impl<Derived> inverse() const;
+ const Inverse<Derived> inverse() const;
+
template<typename ResultType>
void computeInverseAndDetWithCheck(
ResultType& inverse,
@@ -394,6 +385,7 @@ template<typename Derived> class MatrixBase
/////////// SVD module ///////////
JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
+ BDCSVD<PlainObject> bdcSvd(unsigned int computationOptions = 0) const;
/////////// Geometry module ///////////
diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h
index a893b1761..9aeaf8d18 100644
--- a/Eigen/src/Core/NestByValue.h
+++ b/Eigen/src/Core/NestByValue.h
@@ -40,29 +40,29 @@ template<typename ExpressionType> class NestByValue
typedef typename internal::dense_xpr_base<NestByValue>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
- inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
+ EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
- inline Index rows() const { return m_expression.rows(); }
- inline Index cols() const { return m_expression.cols(); }
- inline Index outerStride() const { return m_expression.outerStride(); }
- inline Index innerStride() const { return m_expression.innerStride(); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
- inline const CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
{
return m_expression.coeff(row, col);
}
- inline Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
{
return m_expression.const_cast_derived().coeffRef(row, col);
}
- inline const CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
- inline Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
{
return m_expression.const_cast_derived().coeffRef(index);
}
@@ -91,7 +91,7 @@ template<typename ExpressionType> class NestByValue
m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
}
- operator const ExpressionType&() const { return m_expression; }
+ EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
protected:
const ExpressionType m_expression;
diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h
index 0a1c32743..0ade75255 100644
--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@@ -30,68 +30,35 @@ namespace Eigen {
template<typename ExpressionType, template <typename> class StorageBase>
class NoAlias
{
- typedef typename ExpressionType::Scalar Scalar;
public:
- NoAlias(ExpressionType& expression) : m_expression(expression) {}
-
- /** Behaves like MatrixBase::lazyAssign(other)
- * \sa MatrixBase::lazyAssign() */
+ typedef typename ExpressionType::Scalar Scalar;
+
+ explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
+
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
- { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
-
- /** \sa MatrixBase::operator+= */
+ {
+ call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar>());
+ return m_expression;
+ }
+
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
{
- typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
- SelfAdder tmp(m_expression);
- typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
- typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
- internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
+ call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar>());
return m_expression;
}
-
- /** \sa MatrixBase::operator-= */
+
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
{
- typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
- SelfAdder tmp(m_expression);
- typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
- typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
- internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
+ call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar>());
return m_expression;
}
-#ifndef EIGEN_PARSED_BY_DOXYGEN
- template<typename ProductDerived, typename Lhs, typename Rhs>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
- { other.derived().addTo(m_expression); return m_expression; }
-
- template<typename ProductDerived, typename Lhs, typename Rhs>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
- { other.derived().subTo(m_expression); return m_expression; }
-
- template<typename Lhs, typename Rhs, int NestingFlags>
- EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
- { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
-
- template<typename Lhs, typename Rhs, int NestingFlags>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
- { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
-
- template<typename OtherDerived>
- ExpressionType& operator=(const ReturnByValue<OtherDerived>& func)
- { return m_expression = func; }
-#endif
-
EIGEN_DEVICE_FUNC
ExpressionType& expression() const
{
@@ -133,7 +100,7 @@ class NoAlias
template<typename Derived>
NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
{
- return derived();
+ return NoAlias<Derived, Eigen::MatrixBase >(derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 8aa4c8bc5..4846f2ae1 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -13,7 +13,8 @@
namespace Eigen {
-template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
+// TODO: this does not seems to be needed at all:
+// template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
/** \class PermutationBase
* \ingroup Core_Module
@@ -60,7 +61,6 @@ class PermutationBase : public EigenBase<Derived>
typedef typename Traits::IndicesType IndicesType;
enum {
Flags = Traits::Flags,
- CoeffReadCost = Traits::CoeffReadCost,
RowsAtCompileTime = Traits::RowsAtCompileTime,
ColsAtCompileTime = Traits::ColsAtCompileTime,
MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
@@ -73,6 +73,7 @@ class PermutationBase : public EigenBase<Derived>
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndexType>
PlainPermutationType;
using Base::derived;
+ typedef Transpose<PermutationBase> TransposeReturnType;
#endif
/** Copies the other permutation into *this */
@@ -118,7 +119,7 @@ class PermutationBase : public EigenBase<Derived>
void evalTo(MatrixBase<DenseDerived>& other) const
{
other.setZero();
- for (int i=0; i<rows();++i)
+ for (Index i=0; i<rows(); ++i)
other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
}
#endif
@@ -163,10 +164,10 @@ class PermutationBase : public EigenBase<Derived>
*
* \returns a reference to *this.
*
- * \warning This is much slower than applyTranspositionOnTheRight(int,int):
+ * \warning This is much slower than applyTranspositionOnTheRight(Index,Index):
* this has linear complexity and requires a lot of branching.
*
- * \sa applyTranspositionOnTheRight(int,int)
+ * \sa applyTranspositionOnTheRight(Index,Index)
*/
Derived& applyTranspositionOnTheLeft(Index i, Index j)
{
@@ -185,7 +186,7 @@ class PermutationBase : public EigenBase<Derived>
*
* This is a fast operation, it only consists in swapping two indices.
*
- * \sa applyTranspositionOnTheLeft(int,int)
+ * \sa applyTranspositionOnTheLeft(Index,Index)
*/
Derived& applyTranspositionOnTheRight(Index i, Index j)
{
@@ -198,14 +199,14 @@ class PermutationBase : public EigenBase<Derived>
*
* \note \note_try_to_help_rvo
*/
- inline Transpose<PermutationBase> inverse() const
- { return derived(); }
+ inline TransposeReturnType inverse() const
+ { return TransposeReturnType(derived()); }
/** \returns the tranpose permutation matrix.
*
* \note \note_try_to_help_rvo
*/
- inline Transpose<PermutationBase> transpose() const
- { return derived(); }
+ inline TransposeReturnType transpose() const
+ { return TransposeReturnType(derived()); }
/**** multiplication helpers to hopefully get RVO ****/
@@ -215,13 +216,13 @@ class PermutationBase : public EigenBase<Derived>
template<typename OtherDerived>
void assignTranspose(const PermutationBase<OtherDerived>& other)
{
- for (int i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
+ for (Index i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
}
template<typename Lhs,typename Rhs>
void assignProduct(const Lhs& lhs, const Rhs& rhs)
{
eigen_assert(lhs.cols() == rhs.rows());
- for (int i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
+ for (Index i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
}
#endif
@@ -274,6 +275,7 @@ template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
+ typedef PermutationStorage StorageKind;
typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
typedef typename IndicesType::Index Index;
typedef _StorageIndexType StorageIndexType;
@@ -287,6 +289,8 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
typedef internal::traits<PermutationMatrix> Traits;
public:
+ typedef const PermutationMatrix& Nested;
+
#ifndef EIGEN_PARSED_BY_DOXYGEN
typedef typename Traits::IndicesType IndicesType;
typedef typename Traits::StorageIndexType StorageIndexType;
@@ -298,8 +302,10 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
/** Constructs an uninitialized permutation matrix of given size.
*/
- inline PermutationMatrix(Index size) : m_indices(size)
- {}
+ explicit inline PermutationMatrix(Index size) : m_indices(size)
+ {
+ eigen_internal_assert(size <= NumTraits<StorageIndexType>::highest());
+ }
/** Copy constructor. */
template<typename OtherDerived>
@@ -370,7 +376,10 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
: m_indices(other.nestedPermutation().size())
{
- for (int i=0; i<m_indices.size();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
+ eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndexType>::highest());
+ StorageIndexType end = StorageIndexType(m_indices.size());
+ for (StorageIndexType i=0; i<end;++i)
+ m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
}
template<typename Lhs,typename Rhs>
PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
@@ -391,6 +400,7 @@ template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
: traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
{
+ typedef PermutationStorage StorageKind;
typedef Map<const Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
typedef typename IndicesType::Index Index;
typedef _StorageIndexType StorageIndexType;
@@ -462,8 +472,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
* \sa class PermutationBase, class PermutationMatrix
*/
-struct PermutationStorage {};
-
template<typename _IndicesType> class TranspositionsWrapper;
namespace internal {
template<typename _IndicesType>
@@ -477,10 +485,9 @@ struct traits<PermutationWrapper<_IndicesType> >
enum {
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
- MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime,
- Flags = 0,
- CoeffReadCost = _IndicesType::CoeffReadCost
+ MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
+ MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
+ Flags = 0
};
};
}
@@ -509,35 +516,39 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
typename IndicesType::Nested m_indices;
};
+
+// TODO: Do we need to define these operator* functions? Would it be better to have them inherited
+// from MatrixBase?
+
/** \returns the matrix with the permutation applied to the columns.
*/
-template<typename Derived, typename PermutationDerived>
-inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight>
-operator*(const MatrixBase<Derived>& matrix,
- const PermutationBase<PermutationDerived> &permutation)
+template<typename MatrixDerived, typename PermutationDerived>
+EIGEN_DEVICE_FUNC
+const Product<MatrixDerived, PermutationDerived, DefaultProduct>
+operator*(const MatrixBase<MatrixDerived> &matrix,
+ const PermutationBase<PermutationDerived>& permutation)
{
- return internal::permut_matrix_product_retval
- <PermutationDerived, Derived, OnTheRight>
- (permutation.derived(), matrix.derived());
+ return Product<MatrixDerived, PermutationDerived, DefaultProduct>
+ (matrix.derived(), permutation.derived());
}
/** \returns the matrix with the permutation applied to the rows.
*/
-template<typename Derived, typename PermutationDerived>
-inline const internal::permut_matrix_product_retval
- <PermutationDerived, Derived, OnTheLeft>
+template<typename PermutationDerived, typename MatrixDerived>
+EIGEN_DEVICE_FUNC
+const Product<PermutationDerived, MatrixDerived, DefaultProduct>
operator*(const PermutationBase<PermutationDerived> &permutation,
- const MatrixBase<Derived>& matrix)
+ const MatrixBase<MatrixDerived>& matrix)
{
- return internal::permut_matrix_product_retval
- <PermutationDerived, Derived, OnTheLeft>
- (permutation.derived(), matrix.derived());
+ return Product<PermutationDerived, MatrixDerived, DefaultProduct>
+ (permutation.derived(), matrix.derived());
}
namespace internal {
template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
+ : traits<typename MatrixType::PlainObject>
{
typedef typename MatrixType::PlainObject ReturnType;
};
@@ -590,7 +601,7 @@ struct permut_matrix_product_retval
}
else
{
- for(int i = 0; i < n; ++i)
+ for(Index i = 0; i < n; ++i)
{
Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
(dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
@@ -617,6 +628,8 @@ struct traits<Transpose<PermutationBase<Derived> > >
} // end namespace internal
+// TODO: the specificties should be handled by the evaluator,
+// at the very least we should only specialize TransposeImpl
template<typename Derived>
class Transpose<PermutationBase<Derived> >
: public EigenBase<Transpose<PermutationBase<Derived> > >
@@ -631,26 +644,26 @@ class Transpose<PermutationBase<Derived> >
typedef typename Derived::DenseMatrixType DenseMatrixType;
enum {
Flags = Traits::Flags,
- CoeffReadCost = Traits::CoeffReadCost,
RowsAtCompileTime = Traits::RowsAtCompileTime,
ColsAtCompileTime = Traits::ColsAtCompileTime,
MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
};
typedef typename Traits::Scalar Scalar;
+ typedef typename Traits::Index Index;
#endif
Transpose(const PermutationType& p) : m_permutation(p) {}
- inline int rows() const { return m_permutation.rows(); }
- inline int cols() const { return m_permutation.cols(); }
+ inline Index rows() const { return m_permutation.rows(); }
+ inline Index cols() const { return m_permutation.cols(); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived>
void evalTo(MatrixBase<DenseDerived>& other) const
{
other.setZero();
- for (int i=0; i<rows();++i)
+ for (Index i=0; i<rows();++i)
other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
}
#endif
@@ -663,19 +676,19 @@ class Transpose<PermutationBase<Derived> >
/** \returns the matrix with the inverse permutation applied to the columns.
*/
template<typename OtherDerived> friend
- inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>
+ const Product<OtherDerived, Transpose, DefaultProduct>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
{
- return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
+ return Product<OtherDerived, Transpose, DefaultProduct>(matrix.derived(), trPerm.derived());
}
/** \returns the matrix with the inverse permutation applied to the rows.
*/
template<typename OtherDerived>
- inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>
+ const Product<Transpose, OtherDerived, DefaultProduct>
operator*(const MatrixBase<OtherDerived>& matrix) const
{
- return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived());
+ return Product<Transpose, OtherDerived, DefaultProduct>(*this, matrix.derived());
}
const PermutationType& nestedPermutation() const { return m_permutation; }
@@ -690,6 +703,38 @@ const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() con
return derived();
}
+namespace internal {
+
+// TODO currently a permutation matrix expression has the form PermutationMatrix or PermutationWrapper
+// or their transpose; in the future shape should be defined by the expression traits
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+struct evaluator_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
+{
+ typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
+ typedef PermutationShape Shape;
+ static const int AssumeAliasing = 0;
+};
+
+template<typename IndicesType>
+struct evaluator_traits<PermutationWrapper<IndicesType> >
+{
+ typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
+ typedef PermutationShape Shape;
+ static const int AssumeAliasing = 0;
+};
+
+template<typename Derived>
+struct evaluator_traits<Transpose<PermutationBase<Derived> > >
+{
+ typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
+ typedef PermutationShape Shape;
+ static const int AssumeAliasing = 0;
+};
+
+template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_PERMUTATIONMATRIX_H
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 69f34bd3e..06e326a05 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -128,7 +128,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public:
- enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
+ enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
EIGEN_DEVICE_FUNC
@@ -221,11 +221,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
}
/** \returns a const pointer to the data array of this matrix */
- EIGEN_STRONG_INLINE const Scalar *data() const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
{ return m_storage.data(); }
/** \returns a pointer to the data array of this matrix */
- EIGEN_STRONG_INLINE Scalar *data()
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
{ return m_storage.data(); }
/** Resizes \c *this to a \a rows x \a cols matrix.
@@ -457,7 +457,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
// FIXME is it still needed ?
/** \internal */
EIGEN_DEVICE_FUNC
- PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+ explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
: m_storage(internal::constructor_without_unaligned_array_assert())
{
// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
@@ -639,22 +639,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
*
* \internal
*/
+ // aliasing is dealt once in internall::call_assignment
+ // so at this stage we have to assume aliasing... and resising has to be done later.
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
{
- _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
+ internal::call_assignment(this->derived(), other.derived());
return this->derived();
}
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
-
/** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
* is the case when creating a new matrix) so one can enforce lazy evaluation.
*
@@ -669,7 +663,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
//_resize_to_match(other);
// the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
// it wouldn't allow to copy a row-vector into a column-vector.
- return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
+ internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar>());
+ return this->derived();
}
template<typename T0, typename T1>
@@ -704,9 +699,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
m_storage.data()[1] = Scalar(val1);
}
+ // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array,
+ // then the argument is meant to be the size of the object.
template<typename T>
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0)
+ EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
+ && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
{
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
const bool is_integer = NumTraits<T>::IsInteger;
@@ -714,6 +712,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
resize(size);
}
+
+ // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted)
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
@@ -722,6 +722,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
m_storage.data()[0] = val0;
}
+ // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type)
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const Index& val0,
@@ -734,18 +735,21 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
m_storage.data()[0] = Scalar(val0);
}
+ // Initialize a fixed size matrix from a pointer to raw data
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const Scalar* data){
this->_set_noalias(ConstMapType(data));
}
+ // Initialize an arbitrary matrix from a dense expression
template<typename T, typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
this->_set_noalias(other);
}
+ // Initialize an arbitrary matrix from a generic Eigen expression
template<typename T, typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
@@ -766,23 +770,58 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
{
this->derived() = r;
}
-
+
+ // For fixed -size arrays:
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
+ typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic
+ && Base::SizeAtCompileTime!=1
+ && internal::is_convertible<T, Scalar>::value
+ && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
+ {
+ Base::setConstant(val0);
+ }
+
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Index& val0,
+ typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
+ && (internal::is_same<Index,T>::value)
+ && Base::SizeAtCompileTime!=Dynamic
+ && Base::SizeAtCompileTime!=1
+ && internal::is_convertible<T, Scalar>::value
+ && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
+ {
+ Base::setConstant(val0);
+ }
+
template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
friend struct internal::matrix_swap_impl;
- /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the
- * data pointers.
+ public:
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** \internal
+ * \brief Override DenseBase::swap() since for dynamic-sized matrices
+ * of same type it is enough to swap the data pointers.
*/
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- void _swap(DenseBase<OtherDerived> const & other)
+ void swap(DenseBase<OtherDerived> & other)
{
enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
- internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived());
+ internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());
}
-
- public:
-#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+ /** \internal
+ * \brief const version forwarded to DenseBase::swap
+ */
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(DenseBase<OtherDerived> const & other)
+ { Base::swap(other.derived()); }
+
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void _check_template_params()
{
@@ -797,10 +836,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
&& (Options & (DontAlign|RowMajor)) == Options),
INVALID_MATRIX_TEMPLATE_PARAMETERS)
}
-#endif
-private:
- enum { ThisConstantIsPrivateInPlainObjectBase };
+ enum { IsPlainObjectBase = 1 };
+#endif
};
namespace internal {
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 5d3789be7..cb79543ef 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -12,8 +12,7 @@
namespace Eigen {
-template<typename Lhs, typename Rhs> class Product;
-template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
+template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
/** \class Product
* \ingroup Core_Module
@@ -24,53 +23,108 @@ template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
* \param Rhs the type of the right-hand side expression
*
* This class represents an expression of the product of two arbitrary matrices.
+ *
+ * The other template parameters are:
+ * \tparam Option can be DefaultProduct or LazyProduct
*
*/
-// Use ProductReturnType to get correct traits, in particular vectorization flags
+
namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<Product<Lhs, Rhs> >
- : traits<typename ProductReturnType<Lhs, Rhs>::Type>
-{
- // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
- // TODO: This flag should eventually go in a separate evaluator traits class
+
+// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
+// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor.
+template<typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<Rhs>::Shape >
+struct product_result_scalar
+{
+ typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
+};
+
+template<typename Lhs, typename Rhs, typename RhsShape>
+struct product_result_scalar<Lhs, Rhs, PermutationShape, RhsShape>
+{
+ typedef typename Rhs::Scalar Scalar;
+};
+
+template<typename Lhs, typename Rhs, typename LhsShape>
+ struct product_result_scalar<Lhs, Rhs, LhsShape, PermutationShape>
+{
+ typedef typename Lhs::Scalar Scalar;
+};
+
+template<typename Lhs, typename Rhs, int Option>
+struct traits<Product<Lhs, Rhs, Option> >
+{
+ typedef typename remove_all<Lhs>::type LhsCleaned;
+ typedef typename remove_all<Rhs>::type RhsCleaned;
+ typedef traits<LhsCleaned> LhsTraits;
+ typedef traits<RhsCleaned> RhsTraits;
+
+ typedef MatrixXpr XprKind;
+
+ typedef typename product_result_scalar<LhsCleaned,RhsCleaned>::Scalar Scalar;
+ typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
+ typename RhsTraits::StorageKind,
+ internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
+ typedef typename promote_index_type<typename LhsTraits::Index,
+ typename RhsTraits::Index>::type Index;
+
enum {
- Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
+ RowsAtCompileTime = LhsTraits::RowsAtCompileTime,
+ ColsAtCompileTime = RhsTraits::ColsAtCompileTime,
+ MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
+
+ // FIXME: only needed by GeneralMatrixMatrixTriangular
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
+
+ // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
+ Flags = ( (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1)
+ || ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
+ || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) )
+ ? RowMajorBit : (MaxColsAtCompileTime==1 ? 0 : NoPreferredStorageOrderBit)
};
};
+
} // end namespace internal
-template<typename Lhs, typename Rhs>
-class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
- typename internal::traits<Rhs>::StorageKind>::ret>
+template<typename _Lhs, typename _Rhs, int Option>
+class Product : public ProductImpl<_Lhs,_Rhs,Option,
+ typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
+ typename internal::traits<_Rhs>::StorageKind,
+ internal::product_type<_Lhs,_Rhs>::ret>::ret>
{
public:
+ typedef _Lhs Lhs;
+ typedef _Rhs Rhs;
+
typedef typename ProductImpl<
- Lhs, Rhs,
- typename internal::promote_storage_type<typename Lhs::StorageKind,
- typename Rhs::StorageKind>::ret>::Base Base;
+ Lhs, Rhs, Option,
+ typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind,
+ internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
- typedef typename Lhs::Nested LhsNested;
- typedef typename Rhs::Nested RhsNested;
+ typedef typename internal::nested<Lhs>::type LhsNested;
+ typedef typename internal::nested<Rhs>::type RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
- Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
+ EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
{
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
- inline Index rows() const { return m_lhs.rows(); }
- inline Index cols() const { return m_rhs.cols(); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
- const LhsNestedCleaned& lhs() const { return m_lhs; }
- const RhsNestedCleaned& rhs() const { return m_rhs; }
+ EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
+ EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
protected:
@@ -78,14 +132,77 @@ class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_ty
RhsNested m_rhs;
};
-template<typename Lhs, typename Rhs>
-class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
+namespace internal {
+
+template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
+class dense_product_base
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{};
+
+/** Convertion to scalar for inner-products */
+template<typename Lhs, typename Rhs, int Option>
+class dense_product_base<Lhs, Rhs, Option, InnerProduct>
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{
+ typedef Product<Lhs,Rhs,Option> ProductXpr;
+ typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
+public:
+ using Base::derived;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::Index Index;
+
+ operator const Scalar() const
+ {
+ return typename internal::evaluator<ProductXpr>::type(derived()).coeff(0,0);
+ }
+};
+
+} // namespace internal
+
+// Generic API dispatcher
+template<typename Lhs, typename Rhs, int Option, typename StorageKind>
+class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
{
- typedef Product<Lhs, Rhs> Derived;
public:
+ typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
+};
- typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
+template<typename Lhs, typename Rhs, int Option>
+class ProductImpl<Lhs,Rhs,Option,Dense>
+ : public internal::dense_product_base<Lhs,Rhs,Option>
+{
+ typedef Product<Lhs, Rhs, Option> Derived;
+
+ public:
+
+ typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+ protected:
+ enum {
+ IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) &&
+ (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
+ EnableCoeff = IsOneByOne || Option==LazyProduct
+ };
+
+ public:
+
+ EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
+ {
+ EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+ eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+
+ return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
+ }
+
+ EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
+ {
+ EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+ eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+
+ return typename internal::evaluator<Derived>::type(derived()).coeff(i);
+ }
+
+
};
/***************************************************************************
@@ -102,6 +219,15 @@ prod(const Lhs& lhs, const Rhs& rhs)
return Product<Lhs,Rhs>(lhs,rhs);
}
+/** \internal used to test the evaluator only
+ */
+template<typename Lhs,typename Rhs>
+const Product<Lhs,Rhs,LazyProduct>
+lazyprod(const Lhs& lhs, const Rhs& rhs)
+{
+ return Product<Lhs,Rhs,LazyProduct>(lhs,rhs);
+}
+
} // end namespace Eigen
#endif // EIGEN_PRODUCT_H
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 483914a9b..050343b2d 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -12,253 +12,6 @@
namespace Eigen {
-/** \class ProductBase
- * \ingroup Core_Module
- *
- */
-
-namespace internal {
-template<typename Derived, typename _Lhs, typename _Rhs>
-struct traits<ProductBase<Derived,_Lhs,_Rhs> >
-{
- typedef MatrixXpr XprKind;
- typedef typename remove_all<_Lhs>::type Lhs;
- typedef typename remove_all<_Rhs>::type Rhs;
- typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
- typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
- typename traits<Rhs>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<Lhs>::Index,
- typename traits<Rhs>::Index>::type Index;
- enum {
- RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime,
- ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime,
- MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime,
- Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
- | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
- // Note that EvalBeforeNestingBit and NestByRefBit
- // are not used in practice because nested is overloaded for products
- CoeffReadCost = 0 // FIXME why is it needed ?
- };
-};
-}
-
-#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \
- typedef ProductBase<Derived, Lhs, Rhs > Base; \
- EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
- typedef typename Base::LhsNested LhsNested; \
- typedef typename Base::_LhsNested _LhsNested; \
- typedef typename Base::LhsBlasTraits LhsBlasTraits; \
- typedef typename Base::ActualLhsType ActualLhsType; \
- typedef typename Base::_ActualLhsType _ActualLhsType; \
- typedef typename Base::RhsNested RhsNested; \
- typedef typename Base::_RhsNested _RhsNested; \
- typedef typename Base::RhsBlasTraits RhsBlasTraits; \
- typedef typename Base::ActualRhsType ActualRhsType; \
- typedef typename Base::_ActualRhsType _ActualRhsType; \
- using Base::m_lhs; \
- using Base::m_rhs;
-
-template<typename Derived, typename Lhs, typename Rhs>
-class ProductBase : public MatrixBase<Derived>
-{
- public:
- typedef MatrixBase<Derived> Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase)
-
- typedef typename Lhs::Nested LhsNested;
- typedef typename internal::remove_all<LhsNested>::type _LhsNested;
- typedef internal::blas_traits<_LhsNested> LhsBlasTraits;
- typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
- typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType;
- typedef typename internal::traits<Lhs>::Scalar LhsScalar;
-
- typedef typename Rhs::Nested RhsNested;
- typedef typename internal::remove_all<RhsNested>::type _RhsNested;
- typedef internal::blas_traits<_RhsNested> RhsBlasTraits;
- typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
- typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType;
- typedef typename internal::traits<Rhs>::Scalar RhsScalar;
-
- // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
- typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
-
- public:
-
- typedef typename Base::PlainObject PlainObject;
-
- ProductBase(const Lhs& a_lhs, const Rhs& a_rhs)
- : m_lhs(a_lhs), m_rhs(a_rhs)
- {
- eigen_assert(a_lhs.cols() == a_rhs.rows()
- && "invalid matrix product"
- && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
- }
-
- inline Index rows() const { return m_lhs.rows(); }
- inline Index cols() const { return m_rhs.cols(); }
-
- template<typename Dest>
- inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
-
- template<typename Dest>
- inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
-
- template<typename Dest>
- inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
-
- template<typename Dest>
- inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }
-
- const _LhsNested& lhs() const { return m_lhs; }
- const _RhsNested& rhs() const { return m_rhs; }
-
- // Implicit conversion to the nested type (trigger the evaluation of the product)
- operator const PlainObject& () const
- {
- m_result.resize(m_lhs.rows(), m_rhs.cols());
- derived().evalTo(m_result);
- return m_result;
- }
-
- const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const
- { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
-
- template<int Index>
- const Diagonal<FullyLazyCoeffBaseProductType,Index> diagonal() const
- { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
-
- const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
- { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
-
- // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isn't an Lvalue expression
- typename Base::CoeffReturnType coeff(Index row, Index col) const
- {
- EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
- eigen_assert(this->rows() == 1 && this->cols() == 1);
- Matrix<Scalar,1,1> result = *this;
- return result.coeff(row,col);
- }
-
- typename Base::CoeffReturnType coeff(Index i) const
- {
- EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
- eigen_assert(this->rows() == 1 && this->cols() == 1);
- Matrix<Scalar,1,1> result = *this;
- return result.coeff(i);
- }
-
- const Scalar& coeffRef(Index row, Index col) const
- {
- EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
- eigen_assert(this->rows() == 1 && this->cols() == 1);
- return derived().coeffRef(row,col);
- }
-
- const Scalar& coeffRef(Index i) const
- {
- EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
- eigen_assert(this->rows() == 1 && this->cols() == 1);
- return derived().coeffRef(i);
- }
-
- protected:
-
- LhsNested m_lhs;
- RhsNested m_rhs;
-
- mutable PlainObject m_result;
-};
-
-// here we need to overload the nested rule for products
-// such that the nested type is a const reference to a plain matrix
-namespace internal {
-template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
-struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
-{
- typedef PlainObject const& type;
-};
-}
-
-template<typename NestedProduct>
-class ScaledProduct;
-
-// Note that these two operator* functions are not defined as member
-// functions of ProductBase, because, otherwise we would have to
-// define all overloads defined in MatrixBase. Furthermore, Using
-// "using Base::operator*" would not work with MSVC.
-//
-// Also note that here we accept any compatible scalar types
-template<typename Derived,typename Lhs,typename Rhs>
-const ScaledProduct<Derived>
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-template<typename Derived,typename Lhs,typename Rhs>
-typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
- const ScaledProduct<Derived> >::type
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::RealScalar& x)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-
-template<typename Derived,typename Lhs,typename Rhs>
-const ScaledProduct<Derived>
-operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-template<typename Derived,typename Lhs,typename Rhs>
-typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
- const ScaledProduct<Derived> >::type
-operator*(const typename Derived::RealScalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-namespace internal {
-template<typename NestedProduct>
-struct traits<ScaledProduct<NestedProduct> >
- : traits<ProductBase<ScaledProduct<NestedProduct>,
- typename NestedProduct::_LhsNested,
- typename NestedProduct::_RhsNested> >
-{
- typedef typename traits<NestedProduct>::StorageKind StorageKind;
-};
-}
-
-template<typename NestedProduct>
-class ScaledProduct
- : public ProductBase<ScaledProduct<NestedProduct>,
- typename NestedProduct::_LhsNested,
- typename NestedProduct::_RhsNested>
-{
- public:
- typedef ProductBase<ScaledProduct<NestedProduct>,
- typename NestedProduct::_LhsNested,
- typename NestedProduct::_RhsNested> Base;
- typedef typename Base::Scalar Scalar;
- typedef typename Base::PlainObject PlainObject;
-// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
-
- ScaledProduct(const NestedProduct& prod, const Scalar& x)
- : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
-
- template<typename Dest>
- inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
-
- template<typename Dest>
- inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
-
- template<typename Dest>
- inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
-
- template<typename Dest>
- inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
-
- const Scalar& alpha() const { return m_alpha; }
-
- protected:
- const NestedProduct& m_prod;
- Scalar m_alpha;
-};
-
/** \internal
* Overloaded to perform an efficient C = (A*B).lazy() */
template<typename Derived>
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 855914f2e..488eee00c 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -16,95 +16,347 @@
namespace Eigen {
namespace internal {
+
+/** \internal
+ * Evaluator of a product expression.
+ * Since products require special treatments to handle all possible cases,
+ * we simply deffer the evaluation logic to a product_evaluator class
+ * which offers more partial specialization possibilities.
+ *
+ * \sa class product_evaluator
+ */
+template<typename Lhs, typename Rhs, int Options>
+struct evaluator<Product<Lhs, Rhs, Options> >
+ : public product_evaluator<Product<Lhs, Rhs, Options> >
+{
+ typedef Product<Lhs, Rhs, Options> XprType;
+ typedef product_evaluator<XprType> Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
-// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
-// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic
-// in ProductReturnType to decide.
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+
+// Catch scalar * ( A * B ) and transform it to (A*scalar) * B
+// TODO we should apply that rule only if that's really helpful
+template<typename Lhs, typename Rhs, typename Scalar>
+struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
+ : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> >
+{
+ typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType;
+ typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs())
+ {}
+};
-template<typename XprType, typename ProductType>
-struct product_evaluator_dispatcher;
+
+template<typename Lhs, typename Rhs, int DiagIndex>
+struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
+ : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
+{
+ typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
+ typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
+ Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
+ xpr.index() ))
+ {}
+};
+
+
+// Helper class to perform a matrix product with the destination at hand.
+// Depending on the sizes of the factors, there are different evaluation strategies
+// as controlled by internal::product_type.
+template< typename Lhs, typename Rhs,
+ typename LhsShape = typename evaluator_traits<Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<Rhs>::Shape,
+ int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct generic_product_impl;
template<typename Lhs, typename Rhs>
-struct evaluator_impl<Product<Lhs, Rhs> >
- : product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
+struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
+ : evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> >
{
- typedef Product<Lhs, Rhs> XprType;
- typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
+ enum { AssumeAliasing = 1 };
+};
- evaluator_impl(const XprType& xpr) : Base(xpr)
- { }
+// This is the default evaluator implementation for products:
+// It creates a temporary and call generic_product_impl
+template<typename Lhs, typename Rhs, int ProductTag, typename LhsShape, typename RhsShape>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
+ : public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type
+{
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+ enum {
+ Flags = Base::Flags | EvalBeforeNestingBit
+// CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits<ProductBase>)
+ };
+
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+
+// FIXME shall we handle nested_eval here?
+// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+//
+// const LhsNested lhs(xpr.lhs());
+// const RhsNested rhs(xpr.rhs());
+//
+// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
+
+ generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+ }
+
+protected:
+ PlainObject m_result;
};
-template<typename XprType, typename ProductType>
-struct product_evaluator_traits_dispatcher;
+// Dense = Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ // FIXME shall we handle nested_eval here?
+ generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
+ }
+};
-template<typename Lhs, typename Rhs>
-struct evaluator_traits<Product<Lhs, Rhs> >
- : product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
-{
- static const int AssumeAliasing = 1;
+// Dense += Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
+ {
+ // FIXME shall we handle nested_eval here?
+ generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
+ }
};
-// Case 1: Evaluate all at once
-//
-// We can view the GeneralProduct class as a part of the product evaluator.
-// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
-// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
+// Dense -= Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
+ {
+ // FIXME shall we handle nested_eval here?
+ generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
+ }
+};
-template<typename Lhs, typename Rhs>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
+
+// Dense ?= scalar * Product
+// TODO we should apply that rule if that's really helpful
+// for instance, this is not good for inner products
+template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis>
+struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
+ const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar>
{
- static const int HasEvalTo = 0;
+ typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
+ const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
+ {
+ // TODO use operator* instead of prod() once we have made enough progress
+ call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func);
+ }
};
+
template<typename Lhs, typename Rhs>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
- : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{
- typedef Product<Lhs, Rhs> XprType;
- typedef typename XprType::PlainObject PlainObject;
- typedef typename evaluator<PlainObject>::type evaluator_base;
+ template<typename Dst>
+ static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+ }
+
+ template<typename Dst>
+ static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
+ }
+
+ template<typename Dst>
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
+};
+
+
+/***********************************************************************
+* Implementation of outer dense * dense vector product
+***********************************************************************/
+
+// Column major result
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
+{
+ typedef typename Dst::Index Index;
+ typename evaluator<Rhs>::type rhsEval(rhs);
+ // FIXME make sure lhs is sequentially stored
+ // FIXME not very good if rhs is real and lhs complex while alpha is real too
+ // FIXME we should probably build an evaluator for dst
+ const Index cols = dst.cols();
+ for (Index j=0; j<cols; ++j)
+ func(dst.col(j), rhsEval.coeff(0,j) * lhs);
+}
+
+// Row major result
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) {
+ typedef typename Dst::Index Index;
+ typename evaluator<Lhs>::type lhsEval(lhs);
+ // FIXME make sure rhs is sequentially stored
+ // FIXME not very good if lhs is real and rhs complex while alpha is real too
+ // FIXME we should probably build an evaluator for dst
+ const Index rows = dst.rows();
+ for (Index i=0; i<rows; ++i)
+ func(dst.row(i), lhsEval.coeff(i,0) * rhs);
+}
- // TODO: Computation is too early (?)
- product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
+{
+ template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+
+ // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
+ struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
+ struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+ struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+ struct adds {
+ Scalar m_scale;
+ explicit adds(const Scalar& s) : m_scale(s) {}
+ template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+ dst.const_cast_derived() += m_scale * src;
+ }
+ };
+
+ template<typename Dst>
+ static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
- m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
+ internal::outer_product_selector_run(dst, lhs, rhs, set(), IsRowMajor<Dst>());
+ }
+
+ template<typename Dst>
+ static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, add(), IsRowMajor<Dst>());
+ }
+
+ template<typename Dst>
+ static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, sub(), IsRowMajor<Dst>());
+ }
+
+ template<typename Dst>
+ static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), IsRowMajor<Dst>());
}
-protected:
- PlainObject m_result;
};
-// For the other three subcases, simply call the evalTo() method of GeneralProduct
-// TODO: GeneralProduct should take evaluators, not expression objects.
-template<typename Lhs, typename Rhs, int ProductType>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
+// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
+template<typename Lhs, typename Rhs, typename Derived>
+struct generic_product_impl_base
{
- static const int HasEvalTo = 1;
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+
+ template<typename Dst>
+ static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
+
+ template<typename Dst>
+ static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
+
+ template<typename Dst>
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
+
+ template<typename Dst>
+ static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
+
};
-template<typename Lhs, typename Rhs, int ProductType>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
{
- typedef Product<Lhs, Rhs> XprType;
- typedef typename XprType::PlainObject PlainObject;
- typedef typename evaluator<PlainObject>::type evaluator_base;
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+ typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
+
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ internal::gemv_dense_sense_selector<Side,
+ (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+ bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
+ >::run(lhs, rhs, dst, alpha);
+ }
+};
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
- product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr)
- { }
+ template<typename Dst>
+ static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ // TODO: use the following instead of calling call_assignment, same for the other methods
+ // dst = lazyprod(lhs,rhs);
+ call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op<Scalar>());
+ }
- template<typename DstEvaluatorType, typename DstXprType>
- void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const
+ template<typename Dst>
+ static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
- dst.resize(m_xpr.rows(), m_xpr.cols());
- GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);
+ // dst += lazyprod(lhs,rhs);
+ call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op<Scalar>());
}
-protected:
- const XprType& m_xpr;
+ template<typename Dst>
+ static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ // dst -= lazyprod(lhs,rhs);
+ call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op<Scalar>());
+ }
+
+// template<typename Dst>
+// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+// { dst += alpha * lazyprod(lhs,rhs); }
};
+// This specialization enforces the use of a coefficient-based evaluation strategy
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
+ : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
+
// Case 2: Evaluate coeff by coeff
//
// This is mostly taken from CoeffBasedProduct.h
@@ -117,65 +369,116 @@ struct etor_product_coeff_impl;
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl;
-template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
-{
- static const int HasEvalTo = 0;
-};
-
-template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
- : evaluator_impl_base<Product<Lhs, Rhs> >
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar >
+ : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
{
- typedef Product<Lhs, Rhs> XprType;
- typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
-
- product_evaluator_dispatcher(const XprType& xpr)
- : m_lhsImpl(xpr.lhs()),
- m_rhsImpl(xpr.rhs()),
- m_innerDim(xpr.lhs().cols())
- { }
-
+ typedef Product<Lhs, Rhs, LazyProduct> XprType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename XprType::PacketScalar PacketScalar;
typedef typename XprType::PacketReturnType PacketReturnType;
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : m_lhs(xpr.lhs()),
+ m_rhs(xpr.rhs()),
+ m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
+ m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
+ // or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
+ m_innerDim(xpr.lhs().cols())
+ { }
+
// Everything below here is taken from CoeffBasedProduct.h
+ typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+ typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+
+ typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+ typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+
+ typedef typename evaluator<LhsNestedCleaned>::type LhsEtorType;
+ typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType;
+
enum {
- RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
+ RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
+ ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
+ MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
+
PacketSize = packet_traits<Scalar>::size,
- InnerSize = traits<CoeffBasedProductType>::InnerSize,
- CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
+
+ LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
+ RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
+ CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
+ : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
+
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
- CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
+
+ LhsFlags = LhsEtorType::Flags,
+ RhsFlags = RhsEtorType::Flags,
+
+ LhsRowMajor = LhsFlags & RowMajorBit,
+ RhsRowMajor = RhsFlags & RowMajorBit,
+
+ SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
+
+ CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
+ && (ColsAtCompileTime == Dynamic
+ || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
+ && (RhsFlags&AlignedBit)
+ )
+ ),
+
+ CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
+ && (RowsAtCompileTime == Dynamic
+ || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
+ && (LhsFlags&AlignedBit)
+ )
+ ),
+
+ EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : (RhsRowMajor && !CanVectorizeLhs),
+
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+ | (EvalToRowMajor ? RowMajorBit : 0)
+ | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
+ | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
+ // TODO enable vectorization for mixed types
+ | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
+
+ /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
+ * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+ * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+ * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+ */
+ CanVectorizeInner = SameType
+ && LhsRowMajor
+ && (!RhsRowMajor)
+ && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+ && (LhsFlags & RhsFlags & AlignedBit)
+ && (InnerSize % packet_traits<Scalar>::size == 0)
};
-
- typedef typename evaluator<Lhs>::type LhsEtorType;
- typedef typename evaluator<Rhs>::type RhsEtorType;
- typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
- Unroll ? InnerSize-1 : Dynamic,
- LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
-
- const CoeffReturnType coeff(Index row, Index col) const
+
+ EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const
{
- Scalar res;
- CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
- return res;
+ // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function
+ return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
}
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
* which is why we don't set the LinearAccessBit.
+ * TODO: this seems possible when the result is a vector
*/
- const CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
{
- Scalar res;
const Index row = RowsAtCompileTime == 1 ? 0 : index;
const Index col = RowsAtCompileTime == 1 ? index : 0;
- CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
- return res;
+ // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function
+ return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
}
template<int LoadMode>
@@ -183,224 +486,382 @@ struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNest
{
PacketScalar res;
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
- Unroll ? InnerSize-1 : Dynamic,
- LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
+ Unroll ? InnerSize-1 : Dynamic,
+ LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
+
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
return res;
}
protected:
- typename evaluator<Lhs>::type m_lhsImpl;
- typename evaluator<Rhs>::type m_rhsImpl;
+ const LhsNested m_lhs;
+ const RhsNested m_rhs;
+
+ LhsEtorType m_lhsImpl;
+ RhsEtorType m_rhsImpl;
// TODO: Get rid of m_innerDim if known at compile time
Index m_innerDim;
};
-/***************************************************************************
-* Normal product .coeff() implementation (with meta-unrolling)
-***************************************************************************/
+template<typename Lhs, typename Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar >
+ : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar >
+{
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
+ typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > Base;
+ enum {
+ Flags = Base::Flags | EvalBeforeNestingBit
+ };
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
+ {}
+};
-/**************************************
-*** Scalar path - no vectorization ***
-**************************************/
+/****************************************
+*** Coeff based product, Packet path ***
+****************************************/
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
- etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
- res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
+ etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
}
};
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
- res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+ etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
}
};
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
- eigen_assert(innerDim>0 && "you are using a non initialized matrix");
- res = lhs.coeff(row, 0) * rhs.coeff(0, col);
- for(Index i = 1; i < innerDim; ++i)
- res += lhs.coeff(row, i) * rhs.coeff(i, col);
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
}
};
-/*******************************************
-*** Scalar path with inner vectorization ***
-*******************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
-struct etor_product_coeff_vectorized_unroller
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
- etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
- pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
}
};
-template<typename Lhs, typename Rhs, typename Packet>
-struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
- pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
+ eigen_assert(innerDim>0 && "you are using a non initialized matrix");
+ res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ for(Index i = 1; i < innerDim; ++i)
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
- typedef typename Lhs::PacketScalar Packet;
typedef typename Lhs::Index Index;
- enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
- Packet pres;
- etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
- etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
- res = predux(pres);
+ eigen_assert(innerDim>0 && "you are using a non initialized matrix");
+ res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+ for(Index i = 1; i < innerDim; ++i)
+ res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};
-template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
-struct etor_product_coeff_vectorized_dyn_selector
+
+/***************************************************************************
+* Triangular products
+***************************************************************************/
+template<int Mode, bool LhsIsTriangular,
+ typename Lhs, bool LhsIsVector,
+ typename Rhs, bool RhsIsVector>
+struct triangular_product_impl;
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
- res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
+ triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
+ ::run(dst, lhs.nestedExpression(), rhs, alpha);
}
};
-// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
-// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
-template<typename Lhs, typename Rhs, int RhsCols>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
- res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
+ triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
}
};
-template<typename Lhs, typename Rhs, int LhsRows>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
+
+/***************************************************************************
+* SelfAdjoint products
+***************************************************************************/
+template <typename Lhs, int LhsMode, bool LhsIsVector,
+ typename Rhs, int RhsMode, bool RhsIsVector>
+struct selfadjoint_product_impl;
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
- res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
+ selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
}
};
-template<typename Lhs, typename Rhs>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
{
- typedef typename Lhs::Index Index;
- EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
- res = lhs.transpose().cwiseProduct(rhs).sum();
+ selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
}
};
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
+
+/***************************************************************************
+* Diagonal products
+***************************************************************************/
+
+template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
+struct diagonal_product_evaluator_base
+ : evaluator_base<Derived>
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
+ typedef typename MatrixType::Index Index;
+ typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+public:
+ enum {
+ CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
+
+ MatrixFlags = evaluator<MatrixType>::Flags,
+ DiagFlags = evaluator<DiagonalType>::Flags,
+ _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
+ _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
+ ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
+ _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+ // FIXME currently we need same types, but in the future the next rule should be the one
+ //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
+ _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
+ _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+ Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
+ //(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
+ };
+
+ diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
+ : m_diagImpl(diag), m_matImpl(mat)
{
- etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
}
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
+ {
+ return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
+ }
+
+protected:
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
+ {
+ return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
+ internal::pset1<PacketScalar>(m_diagImpl.coeff(id)));
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
+ {
+ enum {
+ InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
+ DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
+ };
+ return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
+ m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
+ }
+
+ typename evaluator<DiagonalType>::nestedType m_diagImpl;
+ typename evaluator<MatrixType>::nestedType m_matImpl;
};
-/*******************
-*** Packet path ***
-*******************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+// diagonal * dense
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar>
+ : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
+ using Base::m_diagImpl;
+ using Base::m_matImpl;
+ using Base::coeff;
+ using Base::packet_impl;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::Index Index;
+ typedef typename Base::PacketScalar PacketScalar;
+
+ typedef Product<Lhs, Rhs, ProductKind> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+
+ enum {
+ StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+ };
+
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.rhs(), xpr.lhs().diagonal())
{
- etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
- res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
}
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
+ }
+
+#ifndef __CUDACC__
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+ {
+ // NVCC complains about template keyword, so we disable this function in CUDA mode
+ return this->template packet_impl<LoadMode>(row,col, row,
+ typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
+ {
+ return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+#endif
};
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+// dense * diagonal
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar>
+ : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
+ using Base::m_diagImpl;
+ using Base::m_matImpl;
+ using Base::coeff;
+ using Base::packet_impl;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::Index Index;
+ typedef typename Base::PacketScalar PacketScalar;
+
+ typedef Product<Lhs, Rhs, ProductKind> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+
+ enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
+
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.lhs(), xpr.rhs().diagonal())
{
- etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
- res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
}
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
+ }
+
+#ifndef __CUDACC__
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+ {
+ return this->template packet_impl<LoadMode>(row,col, col,
+ typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
+ }
+
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
+ {
+ return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+#endif
};
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+/***************************************************************************
+* Products with permutation matrices
+***************************************************************************/
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag>
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
- res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+ permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs);
+ pmpr.evalTo(dst);
}
};
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag>
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{
- res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+ permut_matrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs);
+ pmpr.evalTo(dst);
}
};
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, DenseShape, ProductTag>
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
{
- eigen_assert(innerDim>0 && "you are using a non initialized matrix");
- res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
- for(Index i = 1; i < innerDim; ++i)
- res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
+ permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs);
+ pmpr.evalTo(dst);
}
};
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Transpose<Rhs>, DenseShape, PermutationShape, ProductTag>
{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
{
- eigen_assert(innerDim>0 && "you are using a non initialized matrix");
- res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
- for(Index i = 1; i < innerDim; ++i)
- res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+ permut_matrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs);
+ pmpr.evalTo(dst);
}
};
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index c626946ba..f6546917e 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -65,6 +65,25 @@ public:
? CompleteUnrolling
: NoUnrolling
};
+
+#ifdef EIGEN_DEBUG_ASSIGN
+ static void debug()
+ {
+ std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
+ std::cerr.setf(std::ios::hex, std::ios::basefield);
+ EIGEN_DEBUG_VAR(Derived::Flags)
+ std::cerr.unsetf(std::ios::hex);
+ EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(PacketSize)
+ EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearVectorize)
+ EIGEN_DEBUG_VAR(MaySliceVectorize)
+ EIGEN_DEBUG_VAR(Traversal)
+ EIGEN_DEBUG_VAR(UnrollingLimit)
+ EIGEN_DEBUG_VAR(Unrolling)
+ std::cerr << std::endl;
+ }
+#endif
};
/***************************************************************************
@@ -174,7 +193,7 @@ struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index;
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
Scalar res;
@@ -200,10 +219,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
typedef typename packet_traits<Scalar>::type PacketScalar;
typedef typename Derived::Index Index;
- static Scalar run(const Derived& mat, const Func& func)
+ static Scalar run(const Derived &mat, const Func& func)
{
const Index size = mat.size();
- eigen_assert(size && "you are using an empty matrix");
+
const Index packetSize = packet_traits<Scalar>::size;
const Index alignedStart = internal::first_aligned(mat);
enum {
@@ -258,7 +277,7 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
typedef typename packet_traits<Scalar>::type PacketScalar;
typedef typename Derived::Index Index;
- static Scalar run(const Derived& mat, const Func& func)
+ EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
const Index innerSize = mat.innerSize();
@@ -300,7 +319,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
Size = Derived::SizeAtCompileTime,
VectorizedSize = (Size / PacketSize) * PacketSize
};
- static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
{
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
if (VectorizedSize > 0) {
@@ -315,6 +334,66 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
}
};
+// evaluator adaptor
+template<typename _XprType>
+class redux_evaluator
+{
+public:
+ typedef _XprType XprType;
+ EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+
+ enum {
+ MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
+ // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
+ Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
+ IsRowMajor = XprType::IsRowMajor,
+ SizeAtCompileTime = XprType::SizeAtCompileTime,
+ InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
+ CoeffReadCost = evaluator<XprType>::CoeffReadCost
+ };
+
+ EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+ EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
+ EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
+ EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
+
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeff(Index row, Index col) const
+ { return m_evaluator.coeff(row, col); }
+
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeff(Index index) const
+ { return m_evaluator.coeff(index); }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ { return m_evaluator.template packet<LoadMode>(row, col); }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ { return m_evaluator.template packet<LoadMode>(index); }
+
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+ { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+
+ template<int LoadMode>
+ PacketReturnType packetByOuterInner(Index outer, Index inner) const
+ { return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+
+protected:
+ typename internal::evaluator<XprType>::nestedType m_evaluator;
+ const XprType &m_xpr;
+};
+
} // end namespace internal
/***************************************************************************
@@ -325,7 +404,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
*
* The template parameter \a BinaryOp is the type of the functor \a func which must be
- * an associative operator. Both current STL and TR1 functor styles are handled.
+ * an associative operator. Both current C++98 and C++11 functor styles are handled.
*
* \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
*/
@@ -334,9 +413,22 @@ template<typename Func>
EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
DenseBase<Derived>::redux(const Func& func) const
{
- typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
- return internal::redux_impl<Func, ThisNested>
- ::run(derived(), func);
+ eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
+
+ // FIXME, eval_nest should be handled by redux_evaluator, however:
+ // - it is currently difficult to provide the right Flags since they are still handled by the expressions
+ // - handling it here might reduce the number of template instantiations
+// typedef typename internal::nested_eval<Derived,1>::type ThisNested;
+// typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned;
+// typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator;
+//
+// ThisNested thisNested(derived());
+// ThisEvaluator thisEval(thisNested);
+
+ typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
+ ThisEvaluator thisEval(derived());
+
+ return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
}
/** \returns the minimum of all coefficients of \c *this.
@@ -346,7 +438,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff() const
{
- return this->redux(Eigen::internal::scalar_min_op<Scalar>());
+ return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
}
/** \returns the maximum of all coefficients of \c *this.
@@ -356,7 +448,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff() const
{
- return this->redux(Eigen::internal::scalar_max_op<Scalar>());
+ return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
}
/** \returns the sum of all coefficients of *this
@@ -369,7 +461,7 @@ DenseBase<Derived>::sum() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(0);
- return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
+ return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
}
/** \returns the mean of all coefficients of *this
@@ -380,7 +472,7 @@ template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::mean() const
{
- return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
+ return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
}
/** \returns the product of all coefficients of *this
@@ -396,7 +488,7 @@ DenseBase<Derived>::prod() const
{
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
return Scalar(1);
- return this->redux(Eigen::internal::scalar_product_op<Scalar>());
+ return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
}
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 92614c6e2..6e6adbd31 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -12,10 +12,6 @@
namespace Eigen {
-template<typename Derived> class RefBase;
-template<typename PlainObjectType, int Options = 0,
- typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
-
/** \class Ref
* \ingroup Core_Module
*
@@ -131,12 +127,12 @@ public:
typedef MapBase<Derived> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
- inline Index innerStride() const
+ EIGEN_DEVICE_FUNC inline Index innerStride() const
{
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
}
- inline Index outerStride() const
+ EIGEN_DEVICE_FUNC inline Index outerStride() const
{
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
: IsVectorAtCompileTime ? this->size()
@@ -144,7 +140,7 @@ public:
: this->rows();
}
- RefBase()
+ EIGEN_DEVICE_FUNC RefBase()
: Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),
// Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:
m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
@@ -158,7 +154,7 @@ protected:
typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
template<typename Expression>
- void construct(Expression& expr)
+ EIGEN_DEVICE_FUNC void construct(Expression& expr)
{
if(PlainObjectType::RowsAtCompileTime==1)
{
@@ -188,6 +184,8 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
{
typedef internal::traits<Ref> Traits;
+ template<typename Derived>
+ EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr);
public:
typedef RefBase<Ref> Base;
@@ -196,20 +194,21 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived>
- inline Ref(PlainObjectBase<Derived>& expr,
- typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+ EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr)
{
- Base::construct(expr);
+ EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+ Base::construct(expr.derived());
}
template<typename Derived>
- inline Ref(const DenseBase<Derived>& expr,
- typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0,
- int = Derived::ThisConstantIsPrivateInPlainObjectBase)
+ EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
#else
template<typename Derived>
inline Ref(DenseBase<Derived>& expr)
#endif
{
+ EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+ EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+ EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
Base::construct(expr.const_cast_derived());
}
@@ -228,7 +227,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
template<typename Derived>
- inline Ref(const DenseBase<Derived>& expr)
+ EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
{
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
@@ -236,18 +235,27 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
construct(expr.derived(), typename Traits::template match<Derived>::type());
}
+ EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) {
+ // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
+ }
+
+ template<typename OtherRef>
+ EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) {
+ construct(other.derived(), typename Traits::template match<OtherRef>::type());
+ }
+
protected:
template<typename Expression>
- void construct(const Expression& expr,internal::true_type)
+ EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
{
Base::construct(expr);
}
template<typename Expression>
- void construct(const Expression& expr, internal::false_type)
+ EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
{
- m_object.lazyAssign(expr);
+ internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>());
Base::construct(m_object);
}
diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h
index dde86a834..3777049ee 100644
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -53,8 +53,9 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
: MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
: (MatrixType::Flags & RowMajorBit) ? 1 : 0,
- Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0),
- CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+
+ // FIXME enable DirectAccess with negative strides?
+ Flags = IsRowMajor ? RowMajorBit : 0
};
};
}
@@ -68,6 +69,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
typedef typename internal::dense_xpr_base<Replicate>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
template<typename OriginalMatrixType>
inline explicit Replicate(const OriginalMatrixType& a_matrix)
diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h
index 7834f6cbc..af01a5567 100644
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -38,9 +38,10 @@ struct traits<ReturnByValue<Derived> >
* So internal::nested always gives the plain return matrix type.
*
* FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
+ * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
*/
template<typename Derived,int n,typename PlainObject>
-struct nested<ReturnByValue<Derived>, n, PlainObject>
+struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
{
typedef typename traits<Derived>::ReturnType type;
};
@@ -48,7 +49,7 @@ struct nested<ReturnByValue<Derived>, n, PlainObject>
} // end namespace internal
template<typename Derived> class ReturnByValue
- : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue<Derived> >::type
+ : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
{
public:
typedef typename internal::traits<Derived>::ReturnType ReturnType;
@@ -73,6 +74,7 @@ template<typename Derived> class ReturnByValue
const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
+#undef Unusable
#endif
};
@@ -84,6 +86,36 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
return derived();
}
+namespace internal {
+
+// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
+// when a ReturnByValue expression is assigned, the evaluator is not constructed.
+// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
+
+template<typename Derived>
+struct evaluator<ReturnByValue<Derived> >
+ : public evaluator<typename internal::traits<Derived>::ReturnType>::type
+{
+ typedef ReturnByValue<Derived> XprType;
+ typedef typename internal::traits<Derived>::ReturnType PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ xpr.evalTo(m_result);
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_RETURNBYVALUE_H
diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h
index e30ae3d28..291300a4a 100644
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -44,14 +44,7 @@ struct traits<Reverse<MatrixType, Direction> >
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-
- // let's enable LinearAccess only with vectorization because of the product overhead
- LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
- ? LinearAccessBit : 0,
-
- Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
-
- CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+ Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
};
};
@@ -74,6 +67,7 @@ template<typename MatrixType, int Direction> class Reverse
typedef typename internal::dense_xpr_base<Reverse>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
using Base::IsRowMajor;
// next line is necessary because otherwise const version of operator()
@@ -95,47 +89,47 @@ template<typename MatrixType, int Direction> class Reverse
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
public:
- inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
+ EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
- inline Index rows() const { return m_matrix.rows(); }
- inline Index cols() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
- inline Index innerStride() const
+ EIGEN_DEVICE_FUNC inline Index innerStride() const
{
return -m_matrix.innerStride();
}
- inline Scalar& operator()(Index row, Index col)
+ EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col)
{
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return coeffRef(row, col);
}
- inline Scalar& coeffRef(Index row, Index col)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
- inline CoeffReturnType coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
{
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
- inline CoeffReturnType coeff(Index index) const
+ EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(m_matrix.size() - index - 1);
}
- inline Scalar& coeffRef(Index index)
+ EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
{
return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
}
- inline Scalar& operator()(Index index)
+ EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index)
{
eigen_assert(index >= 0 && index < m_matrix.size());
return coeffRef(index);
@@ -170,7 +164,7 @@ template<typename MatrixType, int Direction> class Reverse
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
}
- const typename internal::remove_all<typename MatrixType::Nested>::type&
+ EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const
{
return m_matrix;
@@ -190,7 +184,7 @@ template<typename Derived>
inline typename DenseBase<Derived>::ReverseReturnType
DenseBase<Derived>::reverse()
{
- return derived();
+ return ReverseReturnType(derived());
}
/** This is the const version of reverse(). */
@@ -198,7 +192,7 @@ template<typename Derived>
inline const typename DenseBase<Derived>::ConstReverseReturnType
DenseBase<Derived>::reverse() const
{
- return derived();
+ return ConstReverseReturnType(derived());
}
/** This is the "in place" version of reverse: it reverses \c *this.
diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h
index 87993bbb5..79eec1b5b 100644
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -43,23 +43,21 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
- Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
- CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
- + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
- traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
+ Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
};
};
}
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
-class Select : internal::no_assignment_operator,
- public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type
+class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type,
+ internal::no_assignment_operator
{
public:
typedef typename internal::dense_xpr_base<Select>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Select)
+ inline EIGEN_DEVICE_FUNC
Select(const ConditionMatrixType& a_conditionMatrix,
const ThenMatrixType& a_thenMatrix,
const ElseMatrixType& a_elseMatrix)
@@ -69,9 +67,10 @@ class Select : internal::no_assignment_operator,
eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
}
- Index rows() const { return m_condition.rows(); }
- Index cols() const { return m_condition.cols(); }
+ inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); }
+ inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); }
+ inline EIGEN_DEVICE_FUNC
const Scalar coeff(Index i, Index j) const
{
if (m_condition.coeff(i,j))
@@ -80,6 +79,7 @@ class Select : internal::no_assignment_operator,
return m_else.coeff(i,j);
}
+ inline EIGEN_DEVICE_FUNC
const Scalar coeff(Index i) const
{
if (m_condition.coeff(i))
@@ -88,17 +88,17 @@ class Select : internal::no_assignment_operator,
return m_else.coeff(i);
}
- const ConditionMatrixType& conditionMatrix() const
+ inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const
{
return m_condition;
}
- const ThenMatrixType& thenMatrix() const
+ inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const
{
return m_then;
}
- const ElseMatrixType& elseMatrix() const
+ inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const
{
return m_else;
}
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 6c2733650..b785e8e1e 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -35,26 +35,23 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType;
- typedef typename MatrixType::PlainObject DenseMatrixType;
+ typedef typename MatrixType::PlainObject FullMatrixType;
enum {
Mode = UpLo | SelfAdjoint,
- Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits)
- & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved
- CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
+ FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit)
+ & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved
};
};
}
-template <typename Lhs, int LhsMode, bool LhsIsVector,
- typename Rhs, int RhsMode, bool RhsIsVector>
-struct SelfadjointProductMatrix;
-
// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
-template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
- : public TriangularBase<SelfAdjointView<MatrixType, UpLo> >
+template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
+ : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
{
public:
+ typedef _MatrixType MatrixType;
typedef TriangularBase<SelfAdjointView> Base;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
@@ -65,12 +62,13 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
typedef typename MatrixType::Index Index;
enum {
- Mode = internal::traits<SelfAdjointView>::Mode
+ Mode = internal::traits<SelfAdjointView>::Mode,
+ Flags = internal::traits<SelfAdjointView>::Flags
};
typedef typename MatrixType::PlainObject PlainObject;
EIGEN_DEVICE_FUNC
- inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
+ explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{}
EIGEN_DEVICE_FUNC
@@ -98,6 +96,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col)
{
+ EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
Base::check_coordinates_internal(row, col);
return m_matrix.const_cast_derived().coeffRef(row, col);
}
@@ -111,26 +110,29 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
- /** Efficient self-adjoint matrix times vector/matrix product */
+ /** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
+ const Product<SelfAdjointView,OtherDerived>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
- return SelfadjointProductMatrix
- <MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
- (m_matrix, rhs.derived());
+ return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());
}
- /** Efficient vector/matrix times self-adjoint matrix product */
+ /** Efficient vector/matrix times triangular matrix product */
template<typename OtherDerived> friend
EIGEN_DEVICE_FUNC
- SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
+ const Product<OtherDerived,SelfAdjointView>
operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
{
- return SelfadjointProductMatrix
- <OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
- (lhs.derived(),rhs.m_matrix);
+ return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
+ }
+
+ friend EIGEN_DEVICE_FUNC
+ const SelfAdjointView<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,MatrixType>,UpLo>
+ operator*(const Scalar& s, const SelfAdjointView& mat)
+ {
+ return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
}
/** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
@@ -194,96 +196,57 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
namespace internal {
-template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
-{
- enum {
- col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
- row = (UnrollCount-1) % Derived1::RowsAtCompileTime
- };
-
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
-
- if(row == col)
- dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
- else if(row < col)
- dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
- }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
+// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
+// in the future selfadjoint-ness should be defined by the expression traits
+// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
{
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &, const Derived2 &) {}
+ typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+ typedef SelfAdjointShape Shape;
+
+ static const int AssumeAliasing = 0;
};
-template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
+template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
+class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>
+ : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
{
- enum {
- col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
- row = (UnrollCount-1) % Derived1::RowsAtCompileTime
- };
-
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
+protected:
+ typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
+ typedef typename Base::DstXprType DstXprType;
+ typedef typename Base::SrcXprType SrcXprType;
+ using Base::m_dst;
+ using Base::m_src;
+ using Base::m_functor;
+public:
+
+ typedef typename Base::DstEvaluatorType DstEvaluatorType;
+ typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::Index Index;
+ typedef typename Base::AssignmentTraits AssignmentTraits;
+
+
+ EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+ : Base(dst, src, func, dstExpr)
+ {}
+
+ EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
{
- triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
-
- if(row == col)
- dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
- else if(row > col)
- dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
+ eigen_internal_assert(row!=col);
+ Scalar tmp = m_src.coeff(row,col);
+ m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
+ m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
}
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
-{
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
+
+ EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
{
- for(Index j = 0; j < dst.cols(); ++j)
- {
- for(Index i = 0; i < j; ++i)
- {
- dst.copyCoeff(i, j, src);
- dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
- }
- dst.copyCoeff(j, j, src);
- }
- }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
-{
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- typedef typename Derived1::Index Index;
- for(Index i = 0; i < dst.rows(); ++i)
- {
- for(Index j = 0; j < i; ++j)
- {
- dst.copyCoeff(i, j, src);
- dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
- }
- dst.copyCoeff(i, i, src);
- }
+ Base::assignCoeff(id,id);
}
+
+ EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)
+ { eigen_internal_assert(false && "should never be called"); }
};
} // end namespace internal
@@ -297,7 +260,7 @@ template<unsigned int UpLo>
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView() const
{
- return derived();
+ return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
}
template<typename Derived>
@@ -305,7 +268,7 @@ template<unsigned int UpLo>
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView()
{
- return derived();
+ return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 8abdca4a5..38185d9d7 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -12,179 +12,11 @@
namespace Eigen {
-/** \class SelfCwiseBinaryOp
- * \ingroup Core_Module
- *
- * \internal
- *
- * \brief Internal helper class for optimizing operators like +=, -=
- *
- * This is a pseudo expression class re-implementing the copyCoeff/copyPacket
- * method to directly performs a +=/-= operations in an optimal way. In particular,
- * this allows to make sure that the input/output data are loaded only once using
- * aligned packet loads.
- *
- * \sa class SwapWrapper for a similar trick.
- */
-
-namespace internal {
-template<typename BinaryOp, typename Lhs, typename Rhs>
-struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
- : traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
-{
- enum {
- // Note that it is still a good idea to preserve the DirectAccessBit
- // so that assign can correctly align the data.
- Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&AlignedBit) | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
- OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
- InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
- };
-};
-}
-
-template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
- : public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
-{
- public:
-
- typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
-
- typedef typename internal::packet_traits<Scalar>::type Packet;
-
- EIGEN_DEVICE_FUNC
- inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
-
- EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
- EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
- EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
- EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
- EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); }
-
- // note that this function is needed by assign to correctly align loads/stores
- // TODO make Assign use .data()
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index row, Index col)
- {
- EIGEN_STATIC_ASSERT_LVALUE(Lhs)
- return m_matrix.const_cast_derived().coeffRef(row, col);
- }
- EIGEN_DEVICE_FUNC
- inline const Scalar& coeffRef(Index row, Index col) const
- {
- return m_matrix.coeffRef(row, col);
- }
-
- // note that this function is needed by assign to correctly align loads/stores
- // TODO make Assign use .data()
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index index)
- {
- EIGEN_STATIC_ASSERT_LVALUE(Lhs)
- return m_matrix.const_cast_derived().coeffRef(index);
- }
- EIGEN_DEVICE_FUNC
- inline const Scalar& coeffRef(Index index) const
- {
- return m_matrix.const_cast_derived().coeffRef(index);
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- Scalar& tmp = m_matrix.coeffRef(row,col);
- tmp = m_functor(tmp, _other.coeff(row,col));
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(index >= 0 && index < m_matrix.size());
- Scalar& tmp = m_matrix.coeffRef(index);
- tmp = m_functor(tmp, _other.coeff(index));
- }
-
- template<typename OtherDerived, int StoreMode, int LoadMode>
- void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(row >= 0 && row < rows()
- && col >= 0 && col < cols());
- m_matrix.template writePacket<StoreMode>(row, col,
- m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
- }
-
- template<typename OtherDerived, int StoreMode, int LoadMode>
- void copyPacket(Index index, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(index >= 0 && index < m_matrix.size());
- m_matrix.template writePacket<StoreMode>(index,
- m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
- }
-
- // reimplement lazyAssign to handle complex *= real
- // see CwiseBinaryOp ctor for details
- template<typename RhsDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
- {
- EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
- EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
-
- #ifdef EIGEN_DEBUG_ASSIGN
- internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
- #endif
- eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
- internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
- #ifndef EIGEN_NO_DEBUG
- this->checkTransposeAliasing(rhs.derived());
- #endif
- return *this;
- }
-
- // overloaded to honor evaluation of special matrices
- // maybe another solution would be to not use SelfCwiseBinaryOp
- // at first...
- EIGEN_DEVICE_FUNC
- SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
- {
- typename internal::nested<Rhs>::type rhs(_rhs);
- return Base::operator=(rhs);
- }
-
- EIGEN_DEVICE_FUNC
- Lhs& expression() const
- {
- return m_matrix;
- }
-
- EIGEN_DEVICE_FUNC
- const BinaryOp& functor() const
- {
- return m_functor;
- }
-
- protected:
- Lhs& m_matrix;
- const BinaryOp& m_functor;
-
- private:
- SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&);
-};
-
template<typename Derived>
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
- SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
- tmp = PlainObject::Constant(rows(),cols(),other);
+ internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>());
return derived();
}
@@ -192,8 +24,7 @@ template<typename Derived>
inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
- SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
- tmp = PlainObject::Constant(rows(),cols(),other);
+ internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>());
return derived();
}
@@ -201,8 +32,7 @@ template<typename Derived>
inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
- SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
- tmp = PlainObject::Constant(rows(),cols(),other);
+ internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>());
return derived();
}
@@ -210,8 +40,7 @@ template<typename Derived>
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
- SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
- tmp = PlainObject::Constant(rows(),cols(), other);
+ internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>());
return derived();
}
diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h
new file mode 100644
index 000000000..3905cd616
--- /dev/null
+++ b/Eigen/src/Core/Solve.h
@@ -0,0 +1,152 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVE_H
+#define EIGEN_SOLVE_H
+
+namespace Eigen {
+
+template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
+
+/** \class Solve
+ * \ingroup Core_Module
+ *
+ * \brief Pseudo expression representing a solving operation
+ *
+ * \tparam Decomposition the type of the matrix or decomposion object
+ * \tparam Rhstype the type of the right-hand side
+ *
+ * This class represents an expression of A.solve(B)
+ * and most of the time this is the only way it is used.
+ *
+ */
+namespace internal {
+
+// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)
+template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;
+
+template<typename Decomposition, typename RhsType>
+struct solve_traits<Decomposition,RhsType,Dense>
+{
+ typedef typename Decomposition::MatrixType MatrixType;
+ typedef Matrix<typename RhsType::Scalar,
+ MatrixType::ColsAtCompileTime,
+ RhsType::ColsAtCompileTime,
+ RhsType::PlainObject::Options,
+ MatrixType::MaxColsAtCompileTime,
+ RhsType::MaxColsAtCompileTime> PlainObject;
+};
+
+template<typename Decomposition, typename RhsType>
+struct traits<Solve<Decomposition, RhsType> >
+ : traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>
+{
+ typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;
+ typedef traits<PlainObject> BaseTraits;
+ enum {
+ Flags = BaseTraits::Flags & RowMajorBit,
+ CoeffReadCost = Dynamic
+ };
+};
+
+}
+
+
+template<typename Decomposition, typename RhsType>
+class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>
+{
+public:
+ typedef typename RhsType::Index Index;
+ typedef typename internal::traits<Solve>::PlainObject PlainObject;
+
+ Solve(const Decomposition &dec, const RhsType &rhs)
+ : m_dec(dec), m_rhs(rhs)
+ {}
+
+ EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
+
+ EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
+ EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; }
+
+protected:
+ const Decomposition &m_dec;
+ const RhsType &m_rhs;
+};
+
+
+// Specialization of the Solve expression for dense results
+template<typename Decomposition, typename RhsType>
+class SolveImpl<Decomposition,RhsType,Dense>
+ : public MatrixBase<Solve<Decomposition,RhsType> >
+{
+ typedef Solve<Decomposition,RhsType> Derived;
+
+public:
+
+ typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+
+private:
+
+ Scalar coeff(Index row, Index col) const;
+ Scalar coeff(Index i) const;
+};
+
+// Generic API dispatcher
+template<typename Decomposition, typename RhsType, typename StorageKind>
+class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type
+{
+ public:
+ typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;
+};
+
+namespace internal {
+
+// Evaluator of Solve -> eval into a temporary
+template<typename Decomposition, typename RhsType>
+struct evaluator<Solve<Decomposition,RhsType> >
+ : public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>::type
+{
+ typedef Solve<Decomposition,RhsType> SolveType;
+ typedef typename SolveType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
+ : m_result(solve.rows(), solve.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ solve.dec()._solve_impl(solve.rhs(), m_result);
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+// Specialization for "dst = dec.solve(rhs)"
+// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef Solve<DecType,RhsType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ // FIXME shall we resize dst here?
+ src.dec()._solve_impl(src.rhs(), dst);
+ }
+};
+
+} // end namepsace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVE_H
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index e158e3162..f97048bda 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -171,10 +171,10 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
*/
template<typename MatrixType, unsigned int Mode>
template<int Side, typename OtherDerived>
-void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{
OtherDerived& other = _other.const_cast_derived();
- eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
+ eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime };
@@ -183,7 +183,7 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived
OtherCopy otherCopy(other);
internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
- Side, Mode>::run(nestedExpression(), otherCopy);
+ Side, Mode>::run(derived().nestedExpression(), otherCopy);
if (copy)
other = otherCopy;
@@ -213,9 +213,9 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived
template<typename Derived, unsigned int Mode>
template<int Side, typename Other>
const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
-TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const
+TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
{
- return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived());
+ return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
}
namespace internal {
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 64d43e1b1..0b7e39827 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -17,7 +17,6 @@ namespace internal {
template<typename ExpressionType, typename Scalar>
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
{
- using std::max;
Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
if(maxCoeff>scale)
@@ -58,8 +57,6 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
typedef typename Derived::RealScalar RealScalar;
typedef typename Derived::Index Index;
using std::pow;
- EIGEN_USING_STD_MATH(min);
- EIGEN_USING_STD_MATH(max);
using std::sqrt;
using std::abs;
const Derived& vec(_vec.derived());
@@ -136,8 +133,8 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
}
else
return sqrt(amed);
- asml = (min)(abig, amed);
- abig = (max)(abig, amed);
+ asml = numext::mini(abig, amed);
+ abig = numext::maxi(abig, amed);
if(asml <= abig*relerr)
return abig;
else
@@ -160,7 +157,6 @@ template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
- EIGEN_USING_STD_MATH(min);
using std::sqrt;
const Index blockSize = 4096;
RealScalar scale(0);
@@ -174,7 +170,7 @@ MatrixBase<Derived>::stableNorm() const
if (bi>0)
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize)
- internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
+ internal::stable_norm_kernel(this->segment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
return scale * sqrt(ssq);
}
diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h
index d3d454e4e..e46faad34 100644
--- a/Eigen/src/Core/Stride.h
+++ b/Eigen/src/Core/Stride.h
@@ -86,26 +86,26 @@ class Stride
/** \brief Convenience specialization of Stride to specify only an inner stride
* See class Map for some examples */
-template<int Value = Dynamic>
+template<int Value>
class InnerStride : public Stride<0, Value>
{
typedef Stride<0, Value> Base;
public:
typedef DenseIndex Index;
EIGEN_DEVICE_FUNC InnerStride() : Base() {}
- EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
+ EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code
};
/** \brief Convenience specialization of Stride to specify only an outer stride
* See class Map for some examples */
-template<int Value = Dynamic>
+template<int Value>
class OuterStride : public Stride<Value, 0>
{
typedef Stride<Value, 0> Base;
public:
typedef DenseIndex Index;
EIGEN_DEVICE_FUNC OuterStride() : Base() {}
- EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
+ EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} // FIXME making this explicit could break valid code
};
} // end namespace Eigen
diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h
index d602fba65..55319320a 100644
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -12,129 +12,54 @@
namespace Eigen {
-/** \class SwapWrapper
- * \ingroup Core_Module
- *
- * \internal
- *
- * \brief Internal helper class for swapping two expressions
- */
namespace internal {
-template<typename ExpressionType>
-struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {};
-}
-template<typename ExpressionType> class SwapWrapper
- : public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type
+// Overload default assignPacket behavior for swapping them
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
+class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized>
+ : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn>
{
- public:
-
- typedef typename internal::dense_xpr_base<SwapWrapper>::type Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
- typedef typename internal::packet_traits<Scalar>::type Packet;
-
- EIGEN_DEVICE_FUNC
- inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
-
- EIGEN_DEVICE_FUNC
- inline Index rows() const { return m_expression.rows(); }
- EIGEN_DEVICE_FUNC
- inline Index cols() const { return m_expression.cols(); }
- EIGEN_DEVICE_FUNC
- inline Index outerStride() const { return m_expression.outerStride(); }
- EIGEN_DEVICE_FUNC
- inline Index innerStride() const { return m_expression.innerStride(); }
-
- typedef typename internal::conditional<
- internal::is_lvalue<ExpressionType>::value,
- Scalar,
- const Scalar
- >::type ScalarWithConstIfNotLvalue;
-
- EIGEN_DEVICE_FUNC
- inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
- EIGEN_DEVICE_FUNC
- inline const Scalar* data() const { return m_expression.data(); }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index rowId, Index colId)
- {
- return m_expression.const_cast_derived().coeffRef(rowId, colId);
- }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index index)
- {
- return m_expression.const_cast_derived().coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index rowId, Index colId) const
- {
- return m_expression.coeffRef(rowId, colId);
- }
-
- EIGEN_DEVICE_FUNC
- inline Scalar& coeffRef(Index index) const
- {
- return m_expression.coeffRef(index);
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(rowId >= 0 && rowId < rows()
- && colId >= 0 && colId < cols());
- Scalar tmp = m_expression.coeff(rowId, colId);
- m_expression.coeffRef(rowId, colId) = _other.coeff(rowId, colId);
- _other.coeffRef(rowId, colId) = tmp;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(index >= 0 && index < m_expression.size());
- Scalar tmp = m_expression.coeff(index);
- m_expression.coeffRef(index) = _other.coeff(index);
- _other.coeffRef(index) = tmp;
- }
-
- template<typename OtherDerived, int StoreMode, int LoadMode>
- void copyPacket(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(rowId >= 0 && rowId < rows()
- && colId >= 0 && colId < cols());
- Packet tmp = m_expression.template packet<StoreMode>(rowId, colId);
- m_expression.template writePacket<StoreMode>(rowId, colId,
- _other.template packet<LoadMode>(rowId, colId)
- );
- _other.template writePacket<LoadMode>(rowId, colId, tmp);
- }
-
- template<typename OtherDerived, int StoreMode, int LoadMode>
- void copyPacket(Index index, const DenseBase<OtherDerived>& other)
- {
- OtherDerived& _other = other.const_cast_derived();
- eigen_internal_assert(index >= 0 && index < m_expression.size());
- Packet tmp = m_expression.template packet<StoreMode>(index);
- m_expression.template writePacket<StoreMode>(index,
- _other.template packet<LoadMode>(index)
- );
- _other.template writePacket<LoadMode>(index, tmp);
- }
-
- EIGEN_DEVICE_FUNC
- ExpressionType& expression() const { return m_expression; }
-
- protected:
- ExpressionType& m_expression;
+protected:
+ typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
+ typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
+ using Base::m_dst;
+ using Base::m_src;
+ using Base::m_functor;
+
+public:
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::Index Index;
+ typedef typename Base::DstXprType DstXprType;
+ typedef swap_assign_op<Scalar> Functor;
+
+ EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
+ : Base(dst, src, func, dstExpr)
+ {}
+
+ template<int StoreMode, int LoadMode>
+ void assignPacket(Index row, Index col)
+ {
+ m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
+ }
+
+ template<int StoreMode, int LoadMode>
+ void assignPacket(Index index)
+ {
+ m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
+ }
+
+ // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
+ template<int StoreMode, int LoadMode>
+ void assignPacketByOuterInner(Index outer, Index inner)
+ {
+ Index row = Base::rowIndexByOuterInner(outer, inner);
+ Index col = Base::colIndexByOuterInner(outer, inner);
+ assignPacket<StoreMode,LoadMode>(row, col);
+ }
};
+} // namespace internal
+
} // end namespace Eigen
#endif // EIGEN_SWAP_H
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index aba3f6670..3bab6092c 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -29,9 +29,10 @@ namespace Eigen {
namespace internal {
template<typename MatrixType>
-struct traits<Transpose<MatrixType> > : traits<MatrixType>
+struct traits<Transpose<MatrixType> >
{
- typedef typename MatrixType::Scalar Scalar;
+ typedef typename traits<MatrixType>::Scalar Scalar;
+ typedef typename traits<MatrixType>::Index Index;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
typedef typename traits<MatrixType>::StorageKind StorageKind;
@@ -45,7 +46,6 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType>
Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
Flags1 = Flags0 | FlagsLvalueBit,
Flags = Flags1 ^ RowMajorBit,
- CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
};
@@ -61,9 +61,10 @@ template<typename MatrixType> class Transpose
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
EIGEN_DEVICE_FUNC
- inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
+ explicit inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
@@ -100,12 +101,22 @@ struct TransposeImpl_base<MatrixType, false>
} // end namespace internal
+// Generic API dispatcher
+template<typename XprType, typename StorageKind>
+class TransposeImpl
+ : public internal::generic_xpr_base<Transpose<XprType> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
+};
+
template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
: public internal::TransposeImpl_base<MatrixType>::type
{
public:
typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
+ using Base::coeffRef;
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
@@ -118,23 +129,10 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
const Scalar
>::type ScalarWithConstIfNotLvalue;
- inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
- inline const Scalar* data() const { return derived().nestedExpression().data(); }
-
- EIGEN_DEVICE_FUNC
- inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
- {
- EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
- return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
- }
-
- EIGEN_DEVICE_FUNC
- inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
- {
- EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
- return derived().nestedExpression().const_cast_derived().coeffRef(index);
- }
+ EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
+ // FIXME: shall we keep the const version of coeffRef?
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
@@ -146,42 +144,6 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
{
return derived().nestedExpression().coeffRef(index);
}
-
- EIGEN_DEVICE_FUNC
- inline CoeffReturnType coeff(Index rowId, Index colId) const
- {
- return derived().nestedExpression().coeff(colId, rowId);
- }
-
- EIGEN_DEVICE_FUNC
- inline CoeffReturnType coeff(Index index) const
- {
- return derived().nestedExpression().coeff(index);
- }
-
- template<int LoadMode>
- inline const PacketScalar packet(Index rowId, Index colId) const
- {
- return derived().nestedExpression().template packet<LoadMode>(colId, rowId);
- }
-
- template<int LoadMode>
- inline void writePacket(Index rowId, Index colId, const PacketScalar& x)
- {
- derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(colId, rowId, x);
- }
-
- template<int LoadMode>
- inline const PacketScalar packet(Index index) const
- {
- return derived().nestedExpression().template packet<LoadMode>(index);
- }
-
- template<int LoadMode>
- inline void writePacket(Index index, const PacketScalar& x)
- {
- derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x);
- }
};
/** \returns an expression of the transpose of *this.
@@ -207,7 +169,7 @@ template<typename Derived>
inline Transpose<Derived>
DenseBase<Derived>::transpose()
{
- return derived();
+ return TransposeReturnType(derived());
}
/** This is the const version of transpose().
@@ -245,8 +207,7 @@ template<typename Derived>
inline const typename MatrixBase<Derived>::AdjointReturnType
MatrixBase<Derived>::adjoint() const
{
- return this->transpose(); // in the complex case, the .conjugate() is be implicit here
- // due to implicit conversion to return type
+ return AdjointReturnType(this->transpose());
}
/***************************************************************************
@@ -256,18 +217,39 @@ MatrixBase<Derived>::adjoint() const
namespace internal {
template<typename MatrixType,
- bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic>
+ bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
+ bool MatchPacketSize =
+ (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
+ && (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
struct inplace_transpose_selector;
template<typename MatrixType>
-struct inplace_transpose_selector<MatrixType,true> { // square matrix
+struct inplace_transpose_selector<MatrixType,true,false> { // square matrix
static void run(MatrixType& m) {
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
}
};
+// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
template<typename MatrixType>
-struct inplace_transpose_selector<MatrixType,false> { // non square matrix
+struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
+ static void run(MatrixType& m) {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
+ typedef typename MatrixType::Index Index;
+ const Index PacketSize = internal::packet_traits<Scalar>::size;
+ const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned;
+ PacketBlock<Packet> A;
+ for (Index i=0; i<PacketSize; ++i)
+ A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
+ internal::ptranspose(A);
+ for (Index i=0; i<PacketSize; ++i)
+ m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
+ }
+};
+
+template<typename MatrixType,bool MatchPacketSize>
+struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix
static void run(MatrixType& m) {
if (m.rows()==m.cols())
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
@@ -413,15 +395,15 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
}
};
-} // end namespace internal
-
-template<typename Derived>
-template<typename OtherDerived>
-void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const
+template<typename Dst, typename Src>
+void check_for_aliasing(const Dst &dst, const Src &src)
{
- internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other);
+ internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
}
-#endif
+
+} // end namespace internal
+
+#endif // EIGEN_NO_DEBUG
} // end namespace Eigen
diff --git a/Eigen/src/Core/Transpositions.h b/Eigen/src/Core/Transpositions.h
index 92261118f..77e7d6f45 100644
--- a/Eigen/src/Core/Transpositions.h
+++ b/Eigen/src/Core/Transpositions.h
@@ -240,7 +240,7 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexTyp
typedef typename IndicesType::Scalar StorageIndexType;
typedef typename IndicesType::Index Index;
- inline Map(const StorageIndexType* indicesPtr)
+ explicit inline Map(const StorageIndexType* indicesPtr)
: m_indices(indicesPtr)
{}
@@ -299,7 +299,7 @@ class TranspositionsWrapper
typedef typename IndicesType::Scalar StorageIndexType;
typedef typename IndicesType::Index Index;
- inline TranspositionsWrapper(IndicesType& a_indices)
+ explicit inline TranspositionsWrapper(IndicesType& a_indices)
: m_indices(a_indices)
{}
@@ -414,7 +414,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
typedef typename TranspositionType::IndicesType IndicesType;
public:
- Transpose(const TranspositionType& t) : m_transpositions(t) {}
+ explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
inline int size() const { return m_transpositions.size(); }
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 72792d21b..cf0255bce 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -32,17 +32,23 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
enum {
Mode = internal::traits<Derived>::Mode,
- CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime
+ MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+
+ SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime>::ret)
+ /**< This is equal to the number of coefficients, i.e. the number of
+ * rows times the number of columns, or to \a Dynamic if this is not
+ * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
};
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
- typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
+ typedef typename internal::traits<Derived>::FullMatrixType DenseMatrixType;
typedef DenseMatrixType DenseType;
+ typedef Derived const& Nested;
EIGEN_DEVICE_FUNC
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
@@ -55,6 +61,14 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
inline Index outerStride() const { return derived().outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return derived().innerStride(); }
+
+ // dummy resize function
+ void resize(Index nbRows, Index nbCols)
+ {
+ EIGEN_UNUSED_VARIABLE(nbRows);
+ EIGEN_UNUSED_VARIABLE(nbCols);
+ eigen_assert(nbRows==rows() && nbCols==nbCols);
+ }
EIGEN_DEVICE_FUNC
inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); }
@@ -155,96 +169,209 @@ struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+ typedef typename MatrixType::PlainObject FullMatrixType;
typedef MatrixType ExpressionType;
- typedef typename MatrixType::PlainObject DenseMatrixType;
enum {
Mode = _Mode,
- Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
- CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
+ FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))
};
};
}
-template<int Mode, bool LhsIsTriangular,
- typename Lhs, bool LhsIsVector,
- typename Rhs, bool RhsIsVector>
-struct TriangularProduct;
+template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;
template<typename _MatrixType, unsigned int _Mode> class TriangularView
- : public TriangularBase<TriangularView<_MatrixType, _Mode> >
+ : public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >
{
public:
- typedef TriangularBase<TriangularView> Base;
+ typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;
typedef typename internal::traits<TriangularView>::Scalar Scalar;
-
typedef _MatrixType MatrixType;
- typedef typename internal::traits<TriangularView>::DenseMatrixType DenseMatrixType;
- typedef DenseMatrixType PlainObject;
protected:
typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
- typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
public:
- using Base::evalToLazy;
-
typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
typedef typename internal::traits<TriangularView>::Index Index;
+ typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;
enum {
Mode = _Mode,
+ Flags = internal::traits<TriangularView>::Flags,
TransposeMode = (Mode & Upper ? Lower : 0)
| (Mode & Lower ? Upper : 0)
| (Mode & (UnitDiag))
- | (Mode & (ZeroDiag))
+ | (Mode & (ZeroDiag)),
+ IsVectorAtCompileTime = false
};
+ // FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole
EIGEN_DEVICE_FUNC
- inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
+ explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
{}
+
+ using Base::operator=;
+ TriangularView& operator=(const TriangularView &other)
+ { return Base::operator=(other); }
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_matrix.cols(); }
+
+ EIGEN_DEVICE_FUNC
+ const NestedExpression& nestedExpression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC
+ NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); }
+
+ /** \sa MatrixBase::conjugate() const */
+ typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
+ EIGEN_DEVICE_FUNC
+ inline const ConjugateReturnType conjugate() const
+ { return ConjugateReturnType(m_matrix.conjugate()); }
+
+ typedef TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
+ /** \sa MatrixBase::adjoint() const */
+ EIGEN_DEVICE_FUNC
+ inline const AdjointReturnType adjoint() const
+ { return AdjointReturnType(m_matrix.adjoint()); }
+
+ typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
+ /** \sa MatrixBase::transpose() */
+ EIGEN_DEVICE_FUNC
+ inline TransposeReturnType transpose()
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+ typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived());
+ return TransposeReturnType(tmp);
+ }
+
+ typedef TriangularView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
+ /** \sa MatrixBase::transpose() const */
EIGEN_DEVICE_FUNC
- inline Index outerStride() const { return m_matrix.outerStride(); }
+ inline const ConstTransposeReturnType transpose() const
+ {
+ return ConstTransposeReturnType(m_matrix.transpose());
+ }
+
+ template<typename Other>
EIGEN_DEVICE_FUNC
- inline Index innerStride() const { return m_matrix.innerStride(); }
+ inline const Solve<TriangularView, Other>
+ solve(const MatrixBase<Other>& other) const
+ { return Solve<TriangularView, Other>(*this, other.derived()); }
+
+ // workaround MSVC ICE
+ #if EIGEN_COMP_MSVC
+ template<int Side, typename Other>
+ EIGEN_DEVICE_FUNC
+ inline const internal::triangular_solve_retval<Side,TriangularView, Other>
+ solve(const MatrixBase<Other>& other) const
+ { return Base::template solve<Side>(other); }
+ #else
+ using Base::solve;
+ #endif
+
+ EIGEN_DEVICE_FUNC
+ const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
+ {
+ EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
+ return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+ }
+ EIGEN_DEVICE_FUNC
+ SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
+ {
+ EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
+ return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+ }
+
+ EIGEN_DEVICE_FUNC
+ Scalar determinant() const
+ {
+ if (Mode & UnitDiag)
+ return 1;
+ else if (Mode & ZeroDiag)
+ return 0;
+ else
+ return m_matrix.diagonal().prod();
+ }
+
+ protected:
+
+ MatrixTypeNested m_matrix;
+};
+
+template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
+ : public TriangularBase<TriangularView<_MatrixType, _Mode> >
+{
+ public:
+
+ typedef TriangularView<_MatrixType, _Mode> TriangularViewType;
+ typedef TriangularBase<TriangularViewType> Base;
+ typedef typename internal::traits<TriangularViewType>::Scalar Scalar;
+
+ typedef _MatrixType MatrixType;
+ typedef typename MatrixType::PlainObject DenseMatrixType;
+ typedef DenseMatrixType PlainObject;
+
+ public:
+ using Base::evalToLazy;
+ using Base::derived;
+
+ typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;
+ typedef typename internal::traits<TriangularViewType>::Index Index;
+
+ enum {
+ Mode = _Mode,
+ Flags = internal::traits<TriangularViewType>::Flags
+ };
+
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
/** \sa MatrixBase::operator+=() */
template<typename Other>
EIGEN_DEVICE_FUNC
- TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
+ TriangularViewType& operator+=(const DenseBase<Other>& other) {
+ internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>());
+ return derived();
+ }
/** \sa MatrixBase::operator-=() */
template<typename Other>
EIGEN_DEVICE_FUNC
- TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
+ TriangularViewType& operator-=(const DenseBase<Other>& other) {
+ internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+ return derived();
+ }
+
/** \sa MatrixBase::operator*=() */
EIGEN_DEVICE_FUNC
- TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
+ TriangularViewType& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }
/** \sa MatrixBase::operator/=() */
EIGEN_DEVICE_FUNC
- TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
+ TriangularViewType& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; }
/** \sa MatrixBase::fill() */
EIGEN_DEVICE_FUNC
void fill(const Scalar& value) { setConstant(value); }
/** \sa MatrixBase::setConstant() */
EIGEN_DEVICE_FUNC
- TriangularView& setConstant(const Scalar& value)
- { return *this = MatrixType::Constant(rows(), cols(), value); }
+ TriangularViewType& setConstant(const Scalar& value)
+ { return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); }
/** \sa MatrixBase::setZero() */
EIGEN_DEVICE_FUNC
- TriangularView& setZero() { return setConstant(Scalar(0)); }
+ TriangularViewType& setZero() { return setConstant(Scalar(0)); }
/** \sa MatrixBase::setOnes() */
EIGEN_DEVICE_FUNC
- TriangularView& setOnes() { return setConstant(Scalar(1)); }
+ TriangularViewType& setOnes() { return setConstant(Scalar(1)); }
/** \sa MatrixBase::coeff()
* \warning the coordinates must fit into the referenced triangular part
@@ -253,7 +380,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline Scalar coeff(Index row, Index col) const
{
Base::check_coordinates_internal(row, col);
- return m_matrix.coeff(row, col);
+ return derived().nestedExpression().coeff(row, col);
}
/** \sa MatrixBase::coeffRef()
@@ -262,27 +389,23 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col)
{
+ EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
Base::check_coordinates_internal(row, col);
- return m_matrix.const_cast_derived().coeffRef(row, col);
+ return derived().nestedExpression().const_cast_derived().coeffRef(row, col);
}
- EIGEN_DEVICE_FUNC
- const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
- EIGEN_DEVICE_FUNC
- MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
-
/** Assigns a triangular matrix to a triangular part of a dense matrix */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- TriangularView& operator=(const TriangularBase<OtherDerived>& other);
+ TriangularViewType& operator=(const TriangularBase<OtherDerived>& other);
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- TriangularView& operator=(const MatrixBase<OtherDerived>& other);
+ TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);
EIGEN_DEVICE_FUNC
- TriangularView& operator=(const TriangularView& other)
- { return *this = other.nestedExpression(); }
+ TriangularViewType& operator=(const TriangularViewImpl& other)
+ { return *this = other.derived().nestedExpression(); }
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
@@ -290,378 +413,88 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- void lazyAssign(const MatrixBase<OtherDerived>& other);
-
- /** \sa MatrixBase::conjugate() */
- EIGEN_DEVICE_FUNC
- inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
- { return m_matrix.conjugate(); }
- /** \sa MatrixBase::conjugate() const */
- EIGEN_DEVICE_FUNC
- inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
- { return m_matrix.conjugate(); }
-
- /** \sa MatrixBase::adjoint() const */
- EIGEN_DEVICE_FUNC
- inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
- { return m_matrix.adjoint(); }
-
- /** \sa MatrixBase::transpose() */
- EIGEN_DEVICE_FUNC
- inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
- {
- EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
- return m_matrix.const_cast_derived().transpose();
- }
- /** \sa MatrixBase::transpose() const */
- EIGEN_DEVICE_FUNC
- inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
- {
- return m_matrix.transpose();
- }
+ void lazyAssign(const MatrixBase<OtherDerived>& other);
/** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
+ const Product<TriangularViewType,OtherDerived>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
- return TriangularProduct
- <Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
- (m_matrix, rhs.derived());
+ return Product<TriangularViewType,OtherDerived>(derived(), rhs.derived());
}
/** Efficient vector/matrix times triangular matrix product */
template<typename OtherDerived> friend
EIGEN_DEVICE_FUNC
- TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
- operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
+ const Product<OtherDerived,TriangularViewType>
+ operator*(const MatrixBase<OtherDerived>& lhs, const TriangularViewImpl& rhs)
{
- return TriangularProduct
- <Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
- (lhs.derived(),rhs.m_matrix);
+ return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived());
}
template<int Side, typename Other>
EIGEN_DEVICE_FUNC
- inline const internal::triangular_solve_retval<Side,TriangularView, Other>
+ inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>
solve(const MatrixBase<Other>& other) const;
template<int Side, typename OtherDerived>
EIGEN_DEVICE_FUNC
void solveInPlace(const MatrixBase<OtherDerived>& other) const;
- template<typename Other>
- EIGEN_DEVICE_FUNC
- inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
- solve(const MatrixBase<Other>& other) const
- { return solve<OnTheLeft>(other); }
-
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void solveInPlace(const MatrixBase<OtherDerived>& other) const
{ return solveInPlace<OnTheLeft>(other); }
- EIGEN_DEVICE_FUNC
- const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
- {
- EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
- return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
- }
- EIGEN_DEVICE_FUNC
- SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
- {
- EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
- return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
- }
-
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(TriangularBase<OtherDerived> const & other)
{
- TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
+ call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
}
+ // TODO: this overload is ambiguous and it should be deprecated (Gael)
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(MatrixBase<OtherDerived> const & other)
{
- SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
- TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
+ call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
}
+ template<typename RhsType, typename DstType>
EIGEN_DEVICE_FUNC
- Scalar determinant() const
- {
- if (Mode & UnitDiag)
- return 1;
- else if (Mode & ZeroDiag)
- return 0;
- else
- return m_matrix.diagonal().prod();
- }
-
- // TODO simplify the following:
- template<typename ProductDerived, typename Lhs, typename Rhs>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
- {
- setZero();
- return assignProduct(other,1);
- }
-
- template<typename ProductDerived, typename Lhs, typename Rhs>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
- {
- return assignProduct(other,1);
- }
-
- template<typename ProductDerived, typename Lhs, typename Rhs>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
- {
- return assignProduct(other,-1);
- }
-
-
- template<typename ProductDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
- {
- setZero();
- return assignProduct(other,other.alpha());
- }
-
- template<typename ProductDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
- {
- return assignProduct(other,other.alpha());
- }
-
- template<typename ProductDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
- {
- return assignProduct(other,-other.alpha());
+ EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
+ if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs)))
+ dst = rhs;
+ this->solveInPlace(dst);
}
-
- protected:
-
- template<typename ProductDerived, typename Lhs, typename Rhs>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
- MatrixTypeNested m_matrix;
+ template<typename ProductType>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha);
};
/***************************************************************************
* Implementation of triangular evaluation/assignment
***************************************************************************/
-namespace internal {
-
-template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite>
-struct triangular_assignment_selector
-{
- enum {
- col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
- row = (UnrollCount-1) % Derived1::RowsAtCompileTime
- };
-
- typedef typename Derived1::Scalar Scalar;
-
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
-
- eigen_assert( Mode == Upper || Mode == Lower
- || Mode == StrictlyUpper || Mode == StrictlyLower
- || Mode == UnitUpper || Mode == UnitLower);
- if((Mode == Upper && row <= col)
- || (Mode == Lower && row >= col)
- || (Mode == StrictlyUpper && row < col)
- || (Mode == StrictlyLower && row > col)
- || (Mode == UnitUpper && row < col)
- || (Mode == UnitLower && row > col))
- dst.copyCoeff(row, col, src);
- else if(ClearOpposite)
- {
- if (Mode&UnitDiag && row==col)
- dst.coeffRef(row, col) = Scalar(1);
- else
- dst.coeffRef(row, col) = Scalar(0);
- }
- }
-};
-
-// prevent buggy user code from causing an infinite recursion
-template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
-{
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
-{
- typedef typename Derived1::Index Index;
- typedef typename Derived1::Scalar Scalar;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- for(Index j = 0; j < dst.cols(); ++j)
- {
- Index maxi = (std::min)(j, dst.rows()-1);
- for(Index i = 0; i <= maxi; ++i)
- dst.copyCoeff(i, j, src);
- if (ClearOpposite)
- for(Index i = maxi+1; i < dst.rows(); ++i)
- dst.coeffRef(i, j) = Scalar(0);
- }
- }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- for(Index j = 0; j < dst.cols(); ++j)
- {
- for(Index i = j; i < dst.rows(); ++i)
- dst.copyCoeff(i, j, src);
- Index maxi = (std::min)(j, dst.rows());
- if (ClearOpposite)
- for(Index i = 0; i < maxi; ++i)
- dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
- }
- }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
-{
- typedef typename Derived1::Index Index;
- typedef typename Derived1::Scalar Scalar;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- for(Index j = 0; j < dst.cols(); ++j)
- {
- Index maxi = (std::min)(j, dst.rows());
- for(Index i = 0; i < maxi; ++i)
- dst.copyCoeff(i, j, src);
- if (ClearOpposite)
- for(Index i = maxi; i < dst.rows(); ++i)
- dst.coeffRef(i, j) = Scalar(0);
- }
- }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- for(Index j = 0; j < dst.cols(); ++j)
- {
- for(Index i = j+1; i < dst.rows(); ++i)
- dst.copyCoeff(i, j, src);
- Index maxi = (std::min)(j, dst.rows()-1);
- if (ClearOpposite)
- for(Index i = 0; i <= maxi; ++i)
- dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
- }
- }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- for(Index j = 0; j < dst.cols(); ++j)
- {
- Index maxi = (std::min)(j, dst.rows());
- for(Index i = 0; i < maxi; ++i)
- dst.copyCoeff(i, j, src);
- if (ClearOpposite)
- {
- for(Index i = maxi+1; i < dst.rows(); ++i)
- dst.coeffRef(i, j) = 0;
- }
- }
- dst.diagonal().setOnes();
- }
-};
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
-{
- typedef typename Derived1::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(Derived1 &dst, const Derived2 &src)
- {
- for(Index j = 0; j < dst.cols(); ++j)
- {
- Index maxi = (std::min)(j, dst.rows());
- for(Index i = maxi+1; i < dst.rows(); ++i)
- dst.copyCoeff(i, j, src);
- if (ClearOpposite)
- {
- for(Index i = 0; i < maxi; ++i)
- dst.coeffRef(i, j) = 0;
- }
- }
- dst.diagonal().setOnes();
- }
-};
-
-} // end namespace internal
-
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>&
-TriangularView<MatrixType, Mode>::operator=(const MatrixBase<OtherDerived>& other)
+TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
{
- if(OtherDerived::Flags & EvalBeforeAssigningBit)
- {
- typename internal::plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols());
- other_evaluated.template triangularView<Mode>().lazyAssign(other.derived());
- lazyAssign(other_evaluated);
- }
- else
- lazyAssign(other.derived());
- return *this;
+ internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar>());
+ return derived();
}
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
-void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>& other)
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
{
- enum {
- unroll = MatrixType::SizeAtCompileTime != Dynamic
- && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
- && MatrixType::SizeAtCompileTime*internal::traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT
- };
- eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
-
- internal::triangular_assignment_selector
- <MatrixType, OtherDerived, int(Mode),
- unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
- false // do not change the opposite triangular part
- >::run(m_matrix.const_cast_derived(), other.derived());
+ internal::call_assignment(derived().noalias(), other.template triangularView<Mode>());
}
@@ -669,37 +502,19 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>&
-TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other)
+TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)
{
eigen_assert(Mode == int(OtherDerived::Mode));
- if(internal::traits<OtherDerived>::Flags & EvalBeforeAssigningBit)
- {
- typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols());
- other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression());
- lazyAssign(other_evaluated);
- }
- else
- lazyAssign(other.derived().nestedExpression());
- return *this;
+ internal::call_assignment(derived(), other.derived());
+ return derived();
}
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
-void TriangularView<MatrixType, Mode>::lazyAssign(const TriangularBase<OtherDerived>& other)
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
{
- enum {
- unroll = MatrixType::SizeAtCompileTime != Dynamic
- && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
- && MatrixType::SizeAtCompileTime * internal::traits<OtherDerived>::CoeffReadCost / 2
- <= EIGEN_UNROLLING_LIMIT
- };
- eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
-
- internal::triangular_assignment_selector
- <MatrixType, OtherDerived, int(Mode),
- unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
- false // preserve the opposite triangular part
- >::run(m_matrix.const_cast_derived(), other.derived().nestedExpression());
+ eigen_assert(Mode == int(OtherDerived::Mode));
+ internal::call_assignment(derived().noalias(), other.derived());
}
/***************************************************************************
@@ -722,27 +537,6 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
evalToLazy(other.derived());
}
-/** Assigns a triangular or selfadjoint matrix to a dense matrix.
- * If the matrix is triangular, the opposite part is set to zero. */
-template<typename Derived>
-template<typename DenseDerived>
-void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
-{
- enum {
- unroll = DenseDerived::SizeAtCompileTime != Dynamic
- && internal::traits<Derived>::CoeffReadCost != Dynamic
- && DenseDerived::SizeAtCompileTime * internal::traits<Derived>::CoeffReadCost / 2
- <= EIGEN_UNROLLING_LIMIT
- };
- other.derived().resize(this->rows(), this->cols());
-
- internal::triangular_assignment_selector
- <DenseDerived, typename internal::traits<Derived>::MatrixTypeNestedCleaned, Derived::Mode,
- unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic,
- true // clear the opposite triangular part
- >::run(other.derived(), derived().nestedExpression());
-}
-
/***************************************************************************
* Implementation of TriangularView methods
***************************************************************************/
@@ -767,7 +561,7 @@ template<unsigned int Mode>
typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView()
{
- return derived();
+ return typename TriangularViewReturnType<Mode>::Type(derived());
}
/** This is the const version of MatrixBase::triangularView() */
@@ -776,7 +570,7 @@ template<unsigned int Mode>
typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView() const
{
- return derived();
+ return typename ConstTriangularViewReturnType<Mode>::Type(derived());
}
/** \returns true if *this is approximately equal to an upper triangular matrix,
@@ -831,6 +625,293 @@ bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
return true;
}
+
+/***************************************************************************
+****************************************************************************
+* Evaluators and Assignment of triangular expressions
+***************************************************************************
+***************************************************************************/
+
+namespace internal {
+
+
+// TODO currently a triangular expression has the form TriangularView<.,.>
+// in the future triangular-ness should be defined by the expression traits
+// such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<TriangularView<MatrixType,Mode> >
+{
+ typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+ typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
+
+ // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
+ // temporary; 0 if not.
+ static const int AssumeAliasing = 0;
+};
+
+template<typename MatrixType, unsigned int Mode>
+struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
+ : evaluator<typename internal::remove_all<MatrixType>::type>
+{
+ typedef TriangularView<MatrixType,Mode> XprType;
+ typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
+ typedef evaluator<XprType> type;
+ unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
+};
+
+// Additional assignment kinds:
+struct Triangular2Triangular {};
+struct Triangular2Dense {};
+struct Dense2Triangular {};
+
+
+template<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop;
+
+
+/** \internal Specialization of the dense assignment kernel for triangular matrices.
+ * The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions.
+ * \tparam UpLo must be either Lower or Upper
+ * \tparam Mode must be either 0, UnitDiag, ZeroDiag, or SelfAdjoint
+ */
+template<int UpLo, int Mode, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
+class triangular_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
+{
+protected:
+ typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
+ typedef typename Base::DstXprType DstXprType;
+ typedef typename Base::SrcXprType SrcXprType;
+ using Base::m_dst;
+ using Base::m_src;
+ using Base::m_functor;
+public:
+
+ typedef typename Base::DstEvaluatorType DstEvaluatorType;
+ typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::Index Index;
+ typedef typename Base::AssignmentTraits AssignmentTraits;
+
+
+ EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+ : Base(dst, src, func, dstExpr)
+ {}
+
+#ifdef EIGEN_INTERNAL_DEBUGGING
+ EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
+ {
+ eigen_internal_assert(row!=col);
+ Base::assignCoeff(row,col);
+ }
+#else
+ using Base::assignCoeff;
+#endif
+
+ EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
+ {
+ if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));
+ else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));
+ else if(Mode==0) Base::assignCoeff(id,id);
+ }
+
+ EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col)
+ {
+ eigen_internal_assert(row!=col);
+ if(SetOpposite)
+ m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0));
+ }
+};
+
+template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+ typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+ typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+ DstEvaluatorType dstEvaluator(dst);
+ SrcEvaluatorType srcEvaluator(src);
+
+ typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
+ DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+
+ enum {
+ unroll = DstXprType::SizeAtCompileTime != Dynamic
+ && SrcEvaluatorType::CoeffReadCost != Dynamic
+ && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT
+ };
+
+ triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
+}
+
+template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
+EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+{
+ call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>());
+}
+
+template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
+template<> struct AssignmentKind<DenseShape,TriangularShape> { typedef Triangular2Dense Kind; };
+template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dense2Triangular Kind; };
+
+
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar>
+{
+ EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ {
+ eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode));
+
+ call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
+ }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar>
+{
+ EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ {
+ call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);
+ }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular, Scalar>
+{
+ EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ {
+ call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
+ }
+};
+
+
+template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>
+struct triangular_assignment_loop
+{
+ // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
+ typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+ typedef typename DstEvaluatorType::XprType DstXprType;
+
+ enum {
+ col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
+ row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
+ };
+
+ typedef typename Kernel::Scalar Scalar;
+
+ EIGEN_DEVICE_FUNC
+ static inline void run(Kernel &kernel)
+ {
+ triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);
+
+ if(row==col)
+ kernel.assignDiagonalCoeff(row);
+ else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) )
+ kernel.assignCoeff(row,col);
+ else if(SetOpposite)
+ kernel.assignOppositeCoeff(row,col);
+ }
+};
+
+// prevent buggy user code from causing an infinite recursion
+template<typename Kernel, unsigned int Mode, bool SetOpposite>
+struct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(Kernel &) {}
+};
+
+
+
+// TODO: experiment with a recursive assignment procedure splitting the current
+// triangular part into one rectangular and two triangular parts.
+
+
+template<typename Kernel, unsigned int Mode, bool SetOpposite>
+struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
+{
+ typedef typename Kernel::Index Index;
+ typedef typename Kernel::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static inline void run(Kernel &kernel)
+ {
+ for(Index j = 0; j < kernel.cols(); ++j)
+ {
+ Index maxi = (std::min)(j, kernel.rows());
+ Index i = 0;
+ if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
+ {
+ for(; i < maxi; ++i)
+ if(Mode&Upper) kernel.assignCoeff(i, j);
+ else kernel.assignOppositeCoeff(i, j);
+ }
+ else
+ i = maxi;
+
+ if(i<kernel.rows()) // then i==j
+ kernel.assignDiagonalCoeff(i++);
+
+ if (((Mode&Upper) && SetOpposite) || (Mode&Lower))
+ {
+ for(; i < kernel.rows(); ++i)
+ if(Mode&Lower) kernel.assignCoeff(i, j);
+ else kernel.assignOppositeCoeff(i, j);
+ }
+ }
+ }
+};
+
+} // end namespace internal
+
+/** Assigns a triangular or selfadjoint matrix to a dense matrix.
+ * If the matrix is triangular, the opposite part is set to zero. */
+template<typename Derived>
+template<typename DenseDerived>
+void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
+{
+ other.derived().resize(this->rows(), this->cols());
+ internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
+}
+
+namespace internal {
+
+// Triangular = Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Triangular, Scalar>
+{
+ typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst.setZero();
+ dst._assignProduct(src, 1);
+ }
+};
+
+// Triangular += Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Triangular, Scalar>
+{
+ typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
+ {
+ dst._assignProduct(src, 1);
+ }
+};
+
+// Triangular -= Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Triangular, Scalar>
+{
+ typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
+ {
+ dst._assignProduct(src, -1);
+ }
+};
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_TRIANGULARMATRIX_H
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 52eb4f604..a626310ec 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -48,25 +48,15 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
- Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
- Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
+ Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
};
- #if EIGEN_GNUC_AT_LEAST(3,4)
- typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
- #else
- typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
- #endif
- enum {
- CoeffReadCost = TraversalSize==Dynamic ? Dynamic
- : TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
- };
};
}
template< typename MatrixType, typename MemberOp, int Direction>
-class PartialReduxExpr : internal::no_assignment_operator,
- public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type
+class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,
+ internal::no_assignment_operator
{
public:
@@ -75,7 +65,7 @@ class PartialReduxExpr : internal::no_assignment_operator,
typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
- PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
+ explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
: m_matrix(mat), m_functor(func) {}
Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
@@ -138,7 +128,7 @@ struct member_redux {
>::type result_type;
template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
- member_redux(const BinaryOp func) : m_functor(func) {}
+ explicit member_redux(const BinaryOp func) : m_functor(func) {}
template<typename Derived>
inline result_type operator()(const DenseBase<Derived>& mat) const
{ return mat.redux(m_functor); }
@@ -175,10 +165,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
template<template<typename _Scalar> class Functor,
- typename Scalar=typename internal::traits<ExpressionType>::Scalar> struct ReturnType
+ typename Scalar_=Scalar> struct ReturnType
{
typedef PartialReduxExpr<ExpressionType,
- Functor<Scalar>,
+ Functor<Scalar_>,
Direction
> Type;
};
@@ -186,7 +176,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
template<typename BinaryOp> struct ReduxReturnType
{
typedef PartialReduxExpr<ExpressionType,
- internal::member_redux<BinaryOp,typename internal::traits<ExpressionType>::Scalar>,
+ internal::member_redux<BinaryOp,Scalar>,
Direction
> Type;
};
@@ -259,7 +249,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
public:
- inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
+ explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */
inline const ExpressionType& _expression() const { return m_matrix; }
@@ -274,7 +264,22 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
template<typename BinaryOp>
const typename ReduxReturnType<BinaryOp>::Type
redux(const BinaryOp& func = BinaryOp()) const
- { return typename ReduxReturnType<BinaryOp>::Type(_expression(), func); }
+ { return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
+
+ typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;
+ typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;
+ typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType;
+ typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType;
+ typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;
+ typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;
+ typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;
+ typedef typename ReturnType<internal::member_sum>::Type SumReturnType;
+ typedef typename ReturnType<internal::member_mean>::Type MeanReturnType;
+ typedef typename ReturnType<internal::member_all>::Type AllReturnType;
+ typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
+ typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
+ typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
+ typedef Reverse<ExpressionType, Direction> ReverseReturnType;
/** \returns a row (or column) vector expression of the smallest coefficient
* of each column (or row) of the referenced expression.
@@ -285,8 +290,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_minCoeff.out
*
* \sa DenseBase::minCoeff() */
- const typename ReturnType<internal::member_minCoeff>::Type minCoeff() const
- { return _expression(); }
+ const MinCoeffReturnType minCoeff() const
+ { return MinCoeffReturnType(_expression()); }
/** \returns a row (or column) vector expression of the largest coefficient
* of each column (or row) of the referenced expression.
@@ -297,8 +302,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_maxCoeff.out
*
* \sa DenseBase::maxCoeff() */
- const typename ReturnType<internal::member_maxCoeff>::Type maxCoeff() const
- { return _expression(); }
+ const MaxCoeffReturnType maxCoeff() const
+ { return MaxCoeffReturnType(_expression()); }
/** \returns a row (or column) vector expression of the squared norm
* of each column (or row) of the referenced expression.
@@ -308,8 +313,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_squaredNorm.out
*
* \sa DenseBase::squaredNorm() */
- const typename ReturnType<internal::member_squaredNorm,RealScalar>::Type squaredNorm() const
- { return _expression(); }
+ const SquaredNormReturnType squaredNorm() const
+ { return SquaredNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression.
@@ -319,8 +324,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_norm.out
*
* \sa DenseBase::norm() */
- const typename ReturnType<internal::member_norm,RealScalar>::Type norm() const
- { return _expression(); }
+ const NormReturnType norm() const
+ { return NormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm
@@ -329,8 +334,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::blueNorm() */
- const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const
- { return _expression(); }
+ const BlueNormReturnType blueNorm() const
+ { return BlueNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm
@@ -339,8 +344,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::stableNorm() */
- const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const
- { return _expression(); }
+ const StableNormReturnType stableNorm() const
+ { return StableNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm
@@ -349,8 +354,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* This is a vector with real entries, even if the original matrix has complex entries.
*
* \sa DenseBase::hypotNorm() */
- const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const
- { return _expression(); }
+ const HypotNormReturnType hypotNorm() const
+ { return HypotNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the sum
* of each column (or row) of the referenced expression.
@@ -359,31 +364,31 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_sum.out
*
* \sa DenseBase::sum() */
- const typename ReturnType<internal::member_sum>::Type sum() const
- { return _expression(); }
+ const SumReturnType sum() const
+ { return SumReturnType(_expression()); }
/** \returns a row (or column) vector expression of the mean
* of each column (or row) of the referenced expression.
*
* \sa DenseBase::mean() */
- const typename ReturnType<internal::member_mean>::Type mean() const
- { return _expression(); }
+ const MeanReturnType mean() const
+ { return MeanReturnType(_expression()); }
/** \returns a row (or column) vector expression representing
* whether \b all coefficients of each respective column (or row) are \c true.
* This expression can be assigned to a vector with entries of type \c bool.
*
* \sa DenseBase::all() */
- const typename ReturnType<internal::member_all>::Type all() const
- { return _expression(); }
+ const AllReturnType all() const
+ { return AllReturnType(_expression()); }
/** \returns a row (or column) vector expression representing
* whether \b at \b least one coefficient of each respective column (or row) is \c true.
* This expression can be assigned to a vector with entries of type \c bool.
*
* \sa DenseBase::any() */
- const typename ReturnType<internal::member_any>::Type any() const
- { return _expression(); }
+ const AnyReturnType any() const
+ { return Any(_expression()); }
/** \returns a row (or column) vector expression representing
* the number of \c true coefficients of each respective column (or row).
@@ -394,8 +399,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_count.out
*
* \sa DenseBase::count() */
- const PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> count() const
- { return _expression(); }
+ const CountReturnType count() const
+ { return CountReturnType(_expression()); }
/** \returns a row (or column) vector expression of the product
* of each column (or row) of the referenced expression.
@@ -404,8 +409,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_prod.out
*
* \sa DenseBase::prod() */
- const typename ReturnType<internal::member_prod>::Type prod() const
- { return _expression(); }
+ const ProdReturnType prod() const
+ { return ProdReturnType(_expression()); }
/** \returns a matrix expression
@@ -415,8 +420,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude Vectorwise_reverse.out
*
* \sa DenseBase::reverse() */
- const Reverse<ExpressionType, Direction> reverse() const
- { return Reverse<ExpressionType, Direction>( _expression() ); }
+ const ReverseReturnType reverse() const
+ { return ReverseReturnType( _expression() ); }
typedef Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1> ReplicateReturnType;
const ReplicateReturnType replicate(Index factor) const;
@@ -560,7 +565,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/////////// Geometry module ///////////
- Homogeneous<ExpressionType,Direction> homogeneous() const;
+ typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
+ HomogeneousReturnType homogeneous() const;
typedef typename ExpressionType::PlainObject CrossReturnType;
template<typename OtherDerived>
@@ -605,7 +611,7 @@ template<typename Derived>
inline const typename DenseBase<Derived>::ConstColwiseReturnType
DenseBase<Derived>::colwise() const
{
- return derived();
+ return ConstColwiseReturnType(derived());
}
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
@@ -616,7 +622,7 @@ template<typename Derived>
inline typename DenseBase<Derived>::ColwiseReturnType
DenseBase<Derived>::colwise()
{
- return derived();
+ return ColwiseReturnType(derived());
}
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
@@ -630,7 +636,7 @@ template<typename Derived>
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
DenseBase<Derived>::rowwise() const
{
- return derived();
+ return ConstRowwiseReturnType(derived());
}
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
@@ -641,7 +647,7 @@ template<typename Derived>
inline typename DenseBase<Derived>::RowwiseReturnType
DenseBase<Derived>::rowwise()
{
- return derived();
+ return RowwiseReturnType(derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index 6f4b9ec35..02bd4eff3 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -53,6 +53,33 @@ struct visitor_impl<Visitor, Derived, Dynamic>
}
};
+// evaluator adaptor
+template<typename XprType>
+class visitor_evaluator
+{
+public:
+ explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+ enum {
+ RowsAtCompileTime = XprType::RowsAtCompileTime,
+ CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
+ };
+
+ Index rows() const { return m_xpr.rows(); }
+ Index cols() const { return m_xpr.cols(); }
+ Index size() const { return m_xpr.size(); }
+
+ CoeffReturnType coeff(Index row, Index col) const
+ { return m_evaluator.coeff(row, col); }
+
+protected:
+ typename internal::evaluator<XprType>::nestedType m_evaluator;
+ const XprType &m_xpr;
+};
} // end namespace internal
/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
@@ -76,14 +103,17 @@ template<typename Derived>
template<typename Visitor>
void DenseBase<Derived>::visit(Visitor& visitor) const
{
- enum { unroll = SizeAtCompileTime != Dynamic
- && CoeffReadCost != Dynamic
- && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
- && SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
- <= EIGEN_UNROLLING_LIMIT };
- return internal::visitor_impl<Visitor, Derived,
+ typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
+ ThisEvaluator thisEval(derived());
+
+ enum { unroll = SizeAtCompileTime != Dynamic
+ && ThisEvaluator::CoeffReadCost != Dynamic
+ && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
+ && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
+ <= EIGEN_UNROLLING_LIMIT };
+ return internal::visitor_impl<Visitor, ThisEvaluator,
unroll ? int(SizeAtCompileTime) : Dynamic
- >::run(derived(), visitor);
+ >::run(thisEval, visitor);
}
namespace internal {
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 1591458a7..e66d50649 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -22,9 +22,9 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
-#ifdef EIGEN_VECTORIZE_FMA
-#ifndef EIGEN_HAS_FUSED_MADD
-#define EIGEN_HAS_FUSED_MADD 1
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#endif
@@ -137,7 +137,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
#ifdef EIGEN_VECTORIZE_FMA
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if defined(__clang__) || defined(__GNUC__)
+#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
// and gcc stupidly generates a vfmadd132ps instruction,
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
@@ -150,7 +150,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
#endif
}
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if defined(__clang__) || defined(__GNUC__)
+#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
// see above
Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 13b874d0c..f9b93a42b 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -7,23 +7,21 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_COMPLEX_ALTIVEC_H
-#define EIGEN_COMPLEX_ALTIVEC_H
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
namespace Eigen {
namespace internal {
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
-static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
-static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
-static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
-static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
-static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
-static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
-static Packet16uc p16uc_COMPLEX_MASK16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);//{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
-static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = vec_add(p16uc_PSET_HI, p16uc_COMPLEX_MASK16);//{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
-static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = vec_add(p16uc_PSET_LO, p16uc_COMPLEX_MASK16);//{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
+#ifdef _BIG_ENDIAN
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+#else
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+#endif
//---------- float ----------
struct Packet2cf
@@ -65,7 +63,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
res.v = pload<Packet4f>((const float *)&from);
else
res.v = ploadu<Packet4f>((const float *)&from);
- res.v = vec_perm(res.v, res.v, p16uc_PSET_HI);
+ res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
return res;
}
@@ -95,16 +93,16 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
Packet4f v1, v2;
// Permute and multiply the real parts of a and b
- v1 = vec_perm(a.v, a.v, p16uc_COMPLEX_RE);
+ v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
// Get the imaginary parts of a
- v2 = vec_perm(a.v, a.v, p16uc_COMPLEX_IM);
+ v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
// multiply a_re * b
v1 = vec_madd(v1, b.v, p4f_ZERO);
// multiply a_im * b and get the conjugate result
v2 = vec_madd(v2, b.v, p4f_ZERO);
v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR);
// permute back to a proper order
- v2 = vec_perm(v2, v2, p16uc_COMPLEX_REV);
+ v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
return Packet2cf(vec_add(v1, v2));
}
@@ -138,7 +136,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Pack
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
{
Packet4f rev_a;
- rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX_REV2);
+ rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
return Packet2cf(rev_a);
}
@@ -153,9 +151,13 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
{
Packet4f b1, b2;
-
+#ifdef _BIG_ENDIAN
b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
+#else
+ b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
+ b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
+#endif
b2 = (Packet4f) vec_sld(b2, b2, 8);
b2 = padd(b1, b2);
@@ -179,7 +181,11 @@ struct palign_impl<Offset,Packet2cf>
{
if (Offset==1)
{
+#ifdef _BIG_ENDIAN
first.v = vec_sld(first.v, second.v, 8);
+#else
+ first.v = vec_sld(second.v, first.v, 8);
+#endif
}
}
};
@@ -222,23 +228,203 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
// TODO optimize it for AltiVec
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s = vec_madd(b.v, b.v, p4f_ZERO);
- return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV))));
+ return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
{
- return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
+ return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
}
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
{
- Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0);
- kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1);
+ Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
kernel.packet[0].v = tmp;
}
+//---------- double ----------
+#ifdef __VSX__
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, DenseIndex stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet1cd>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, DenseIndex stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<double> >(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d a_re, a_im, v1, v2;
+
+ // Permute and multiply the real parts of a and b
+ a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+ // Get the imaginary parts of a
+ a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+ // multiply a_re * b
+ v1 = vec_madd(a_re, b.v, p2d_ZERO);
+ // multiply a_im * b and get the conjugate result
+ v2 = vec_madd(a_im, b.v, p2d_ZERO);
+ v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
+ v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
+
+ return Packet1cd(vec_add(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
+{
+ return pset1<Packet1cd>(*from);
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res[2];
+ pstore<std::complex<double> >(res, a);
+
+ return res[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+ {
+ // FIXME is it sure we never have to align a Packet1cd?
+ // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ // TODO optimize it for AltiVec
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
+ return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+#endif // __VSX__
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN_COMPLEX_ALTIVEC_H
+#endif // EIGEN_COMPLEX32_ALTIVEC_H
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index b43e8ace3..6b68fc7a5 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -18,17 +18,17 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif
-#ifndef EIGEN_HAS_FUSED_MADD
-#define EIGEN_HAS_FUSED_MADD 1
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
-#ifndef EIGEN_HAS_FUSE_CJMADD
-#define EIGEN_HAS_FUSE_CJMADD 1
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
-#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
#endif
typedef __vector float Packet4f;
@@ -50,22 +50,20 @@ typedef __vector unsigned char Packet16uc;
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
Packet4f p4f_##NAME = pset1<Packet4f>(X)
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
- Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
-
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
+
#define DST_CHAN 1
#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
-// Define global static constants:
-static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
-static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
-static Packet16uc p16uc_REVERSE = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
-static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); //{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}
-static Packet16uc p16uc_DUPLICATE = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
+// These constants are endian-agnostic
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
@@ -74,6 +72,50 @@ static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+
+// Mask alignment
+#ifdef __PPC64__
+#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
+#else
+#define _EIGEN_MASK_ALIGNMENT 0xfffffff0
+#endif
+
+#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+
+// Handle endianness properly while loading constants
+// Define global static constants:
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+#else
+static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+#endif // _BIG_ENDIAN
+
+static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
+
+static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
+
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#else
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#endif // _BIG_ENDIAN
+
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
@@ -105,9 +147,22 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
+
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
-/*
+
+inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
+{
+ union {
+ Packet16uc v;
+ unsigned char n[16];
+ } vt;
+ vt.v = v;
+ for (int i=0; i< 16; i++)
+ s << (int)vt.n[i] << ", ";
+ return s;
+}
+
inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
{
union {
@@ -140,7 +195,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
return s;
}
-
+/*
inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
{
union {
@@ -150,14 +205,21 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
vt.v = v;
s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
return s;
-}
-*/
+}*/
+
+
+// Need to define them first or we get specialization after instantiation errors
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
float EIGEN_ALIGN16 af[4];
af[0] = from;
- Packet4f vc = vec_ld(0, af);
+ Packet4f vc = pload<Packet4f>(af);
vc = vec_splat(vc, 0);
return vc;
}
@@ -165,17 +227,15 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
int EIGEN_ALIGN16 ai[4];
ai[0] = from;
- Packet4i vc = vec_ld(0, ai);
+ Packet4i vc = pload<Packet4i>(ai);
vc = vec_splat(vc, 0);
return vc;
}
-
-
template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4f>(const float *a,
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
{
- a3 = vec_ld(0,a);
+ a3 = pload<Packet4f>(a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
@@ -185,7 +245,7 @@ template<> EIGEN_STRONG_INLINE void
pbroadcast4<Packet4i>(const int *a,
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
{
- a3 = vec_ld(0,a);
+ a3 = pload<Packet4i>(a);
a0 = vec_splat(a3, 0);
a1 = vec_splat(a3, 1);
a2 = vec_splat(a3, 2);
@@ -199,7 +259,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa
af[1] = from[1*stride];
af[2] = from[2*stride];
af[3] = from[3*stride];
- return vec_ld(0, af);
+ return pload<Packet4f>(af);
}
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride)
{
@@ -208,12 +268,12 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f
ai[1] = from[1*stride];
ai[2] = from[2*stride];
ai[3] = from[3*stride];
- return vec_ld(0, ai);
+ return pload<Packet4i>(ai);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride)
{
float EIGEN_ALIGN16 af[4];
- vec_st(from, 0, af);
+ pstore<float>(af, from);
to[0*stride] = af[0];
to[1*stride] = af[1];
to[2*stride] = af[2];
@@ -222,7 +282,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride)
{
int EIGEN_ALIGN16 ai[4];
- vec_st(from, 0, ai);
+ pstore<int>((int *)ai, from);
to[0*stride] = ai[0];
to[1*stride] = ai[1];
to[2*stride] = ai[2];
@@ -283,7 +343,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const
*/
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{
- Packet4f t, y_0, y_1, res;
+#ifndef __VSX__ // VSX actually provides a div instruction
+ Packet4f t, y_0, y_1;
// Altivec does not offer a divide instruction, we have to do a reciprocal approximation
y_0 = vec_re(b);
@@ -292,8 +353,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const
t = vec_nmsub(y_0, b, p4f_ONE);
y_1 = vec_madd(y_0, t, y_0);
- res = vec_madd(a, y_1, p4f_ZERO);
- return res;
+ return vec_madd(a, y_1, p4f_ZERO);
+#else
+ return vec_div(a, b);
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
@@ -311,7 +374,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
-// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
@@ -324,13 +386,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
-template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
-template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
-
+#ifdef _BIG_ENDIAN
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
- // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
Packet16uc MSQ, LSQ;
Packet16uc mask;
MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
@@ -350,25 +409,36 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
mask = vec_lvsl(0, from); // create the permute mask
return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data
}
+#else
+// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
+}
+#endif
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
Packet4f p;
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
else p = ploadu<Packet4f>(from);
- return vec_perm(p, p, p16uc_DUPLICATE);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
}
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
{
Packet4i p;
if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
else p = ploadu<Packet4i>(from);
- return vec_perm(p, p, p16uc_DUPLICATE);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
}
-template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
-template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
-
+#ifdef _BIG_ENDIAN
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
{
EIGEN_DEBUG_UNALIGNED_STORE
@@ -405,15 +475,30 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& f
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part
}
+#else
+// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to));
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+#endif
+#ifndef __VSX__
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
+#endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
-template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
-template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); }
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
@@ -460,7 +545,11 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i sum;
sum = vec_sums(a, p4i_ZERO);
+#ifdef _BIG_ENDIAN
sum = vec_sld(sum, p4i_ZERO, 12);
+#else
+ sum = vec_sld(p4i_ZERO, sum, 4);
+#endif
return pfirst(sum);
}
@@ -547,8 +636,25 @@ struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
- if (Offset!=0)
- first = vec_sld(first, second, Offset*4);
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
}
};
@@ -557,8 +663,25 @@ struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
- if (Offset!=0)
- first = vec_sld(first, second, Offset*4);
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
}
};
@@ -588,6 +711,222 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
kernel.packet[3] = vec_mergel(t1, t3);
}
+
+//---------- double ----------
+#ifdef __VSX__
+typedef __vector double Packet2d;
+typedef __vector unsigned long long Packet2ul;
+typedef __vector long long Packet2l;
+
+static Packet2l p2l_ZERO = (Packet2l) p4i_ZERO;
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO = (Packet2d) p4f_ZERO;
+static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
+
+#ifdef _BIG_ENDIAN
+static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ZERO, (Packet16uc) p2d_ONE, 8);
+#else
+static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ONE, (Packet16uc) p2d_ZERO, 8);
+#endif
+
+static EIGEN_STRONG_INLINE Packet2d vec_splat_dbl(Packet2d& a, int index)
+{
+ switch (index) {
+ case 0:
+ return (Packet2d) vec_perm(a, a, p16uc_PSET64_HI);
+ case 1:
+ return (Packet2d) vec_perm(a, a, p16uc_PSET64_LO);
+ }
+ return a;
+}
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+
+ HasDiv = 1,
+ HasExp = 0,
+ HasSqrt = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
+
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+ union {
+ Packet2d v;
+ double n[2];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1];
+ return s;
+}
+
+// Need to define them first or we get specialization after instantiation errors
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return (Packet2d) vec_ld(0, (const float *) from); } //FIXME
+
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st((Packet4f)from, 0, (float *)to); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from;
+ Packet2d vc = pload<Packet2d>(af);
+ vc = vec_splat_dbl(vc, 0);
+ return vc;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+ a1 = pload<Packet2d>(a);
+ a0 = vec_splat_dbl(a1, 0);
+ a1 = vec_splat_dbl(a1, 1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec_splat_dbl(a3, 0);
+ a3 = vec_splat_dbl(a3, 1);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ pstore<double>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); }
+
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_sub(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return psub<Packet2d>(p2d_ZERO, a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet2d) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ Packet2d p;
+ if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
+ else p = ploadu<Packet2d>(from);
+ return vec_perm(p, p, p16uc_PSET64_HI);
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { vec_dstt((const float *) addr, DST_CTRL(2,2,32), DST_CHAN); }
+
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return (Packet2d)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE64); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ Packet2d b, sum;
+ b = (Packet2d) vec_sld((Packet4ui) a, (Packet4ui)a, 8);
+ sum = vec_add(a, b);
+ return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ Packet2d v[2], sum;
+ v[0] = vec_add(vecs[0], (Packet2d) vec_sld((Packet4ui) vecs[0], (Packet4ui) vecs[0], 8));
+ v[1] = vec_add(vecs[1], (Packet2d) vec_sld((Packet4ui) vecs[1], (Packet4ui) vecs[1], 8));
+
+#ifdef _BIG_ENDIAN
+ sum = (Packet2d) vec_sld((Packet4ui) v[0], (Packet4ui) v[1], 8);
+#else
+ sum = (Packet2d) vec_sld((Packet4ui) v[1], (Packet4ui) v[0], 8);
+#endif
+
+ return sum;
+}
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmul(a, (Packet2d)vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
+}
+
+// min
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst(vec_min(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
+}
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst(vec_max(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8)));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset == 1)
+#ifdef _BIG_ENDIAN
+ first = (Packet2d) vec_sld((Packet4ui) first, (Packet4ui) second, 8);
+#else
+ first = (Packet2d) vec_sld((Packet4ui) second, (Packet4ui) first, 8);
+#endif
+ }
+};
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ Packet2d t0, t1;
+ t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+ t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+ kernel.packet[0] = t0;
+ kernel.packet[1] = t1;
+}
+
+#endif // __VSX__
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 42e7733d7..0fdcb0741 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -33,6 +33,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
+ HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -88,7 +89,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
- return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+ return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
}
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
@@ -252,7 +253,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
- // TODO optimize it for AltiVec
+ // TODO optimize it for NEON
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s, rev_s;
@@ -265,11 +266,198 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
- float32x4_t tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
+ Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
kernel.packet[1].v = tmp;
}
+//---------- double ----------
+#if EIGEN_ARCH_ARM64
+
+static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);
+
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d v1, v2;
+
+ // Get the real values of a
+ v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
+ // Get the real values of a
+ v2 = vdupq_lane_f64(vget_high_f64(a.v), 1);
+ // Multiply the real a with b
+ v1 = vmulq_f64(v1, b.v);
+ // Multiply the imag a with b
+ v2 = vmulq_f64(v2, b.v);
+ // Conjugate v2
+ v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
+ // Swap real/imag elements in v2.
+ v2 = preverse<Packet2d>(v2);
+ // Add and return the result
+ return Packet1cd(vaddq_f64(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, DenseIndex stride)
+{
+ Packet2d res;
+ res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
+ res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
+ return Packet1cd(res);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, DenseIndex stride)
+{
+ to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
+}
+
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res;
+ pstore<std::complex<double> >(&res, a);
+
+ return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+ {
+ // FIXME is it sure we never have to align a Packet1cd?
+ // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ // TODO optimize it for NEON
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = pmul<Packet2d>(b.v, b.v);
+ Packet2d rev_s = preverse<Packet2d>(s);
+
+ return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
+ kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
+ kernel.packet[1].v = tmp;
+}
+#endif // EIGEN_ARCH_ARM64
} // end namespace internal
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 0504c095c..9afd86bec 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -20,14 +20,24 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+
// FIXME NEON has 16 quad registers, but since the current register allocator
// is so bad, it is much better to reduce it to 8
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
-#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
#endif
+typedef float32x2_t Packet2f;
typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i;
+typedef int32x2_t Packet2i;
typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
@@ -39,7 +49,7 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
-#if defined(__llvm__) && !defined(__clang__)
+#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
@@ -52,11 +62,11 @@ typedef uint32x4_t Packet4ui;
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
// which available on LLVM and GCC (at least)
-#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
#elif defined __pld
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
-#elif !defined(__aarch64__)
+#elif !EIGEN_ARCH_ARM64
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
#else
// by default no explicit prefetching
@@ -66,11 +76,12 @@ typedef uint32x4_t Packet4ui;
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet4f type;
- typedef Packet4f half;
+ typedef Packet2f half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
+ HasHalfPacket=1,
HasDiv = 1,
// FIXME check the Has*
@@ -84,16 +95,17 @@ template<> struct packet_traits<float> : default_packet_traits
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet4i type;
- typedef Packet4i half;
+ typedef Packet2i half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
- size=4
+ size=4,
+ HasHalfPacket=1
// FIXME check the Has*
};
};
-#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
+#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
@@ -136,6 +148,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{
+#if EIGEN_ARCH_ARM64
+ return vdivq_f32(a,b);
+#else
Packet4f inv, restep, div;
// NEON does not offer a divide instruction, we have to do a reciprocal approximation
@@ -154,14 +169,27 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const
div = vmulq_f32(a, inv);
return div;
+#endif
}
+
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by NEON");
return pset1<Packet4i>(0);
}
-// for some weird raisons, it has to be overloaded for packet of integers
+#ifdef __ARM_FEATURE_FMA
+// See bug 936.
+// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
+// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
+// MLA is not fused i.e. does 2 roundings.
+// In addition to giving better accuracy, FMA also gives better performance here on a Krait (Nexus 4):
+// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
+#else
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); }
+#endif
+
+// No FMA instruction for int, so use MLA unconditionally.
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
@@ -472,6 +500,193 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
}
+//---------- double ----------
+#if EIGEN_ARCH_ARM64
+
+#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
+// Bug 907: workaround missing declarations of the following two functions in the ADK
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f64 (float64x2_t __a)
+{
+ return (uint64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_f64_u64 (uint64x2_t __a)
+{
+ return (float64x2_t) __a;
+}
+#endif
+
+typedef float64x2_t Packet2d;
+typedef float64x1_t Packet1d;
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet1d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket=1,
+
+ HasDiv = 1,
+ // FIXME check the Has*
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 0,
+ HasSqrt = 0
+ };
+};
+
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
+
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a)
+{
+ Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1);
+ return vaddq_f64(pset1<Packet2d>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
+
+#ifdef __ARM_FEATURE_FMA
+// See bug 936. See above comment about FMA for float.
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
+
+// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ return vld1q_dup_f64(from);
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride)
+{
+ Packet2d res;
+ res = vsetq_lane_f64(from[0*stride], res, 0);
+ res = vsetq_lane_f64(from[1*stride], res, 1);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride)
+{
+ to[stride*0] = vgetq_lane_f64(from, 0);
+ to[stride*1] = vgetq_lane_f64(from, 1);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
+
+// FIXME only store the 2 first elements ?
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
+
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+// workaround ICE, see bug 907
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ float64x2_t trn1, trn2;
+
+ // NEON zip performs interleaving of the supplied vectors.
+ // We perform two interleaves in a row to acquire the transposed vector
+ trn1 = vzip1q_f64(vecs[0], vecs[1]);
+ trn2 = vzip2q_f64(vecs[0], vecs[1]);
+
+ // Do the addition of the resulting vectors
+ return vaddq_f64(trn1, trn2);
+}
+// Other reduction functions:
+// mul
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
+#endif
+
+// min
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); }
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); }
+
+// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
+// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+
+PALIGN_NEON(0,Packet2d,vextq_f64)
+PALIGN_NEON(1,Packet2d,vextq_f64)
+#undef PALIGN_NEON
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
+ float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
+
+ kernel.packet[0] = trn1;
+ kernel.packet[1] = trn2;
+}
+#endif // EIGEN_ARCH_ARM64
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 8f78b3a6c..9ffba5b41 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -52,7 +52,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
Packet4i emm0;
- Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
+ Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN
Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
@@ -167,7 +167,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);
- return pmul(y, Packet4f(_mm_castsi128_ps(emm0)));
+ return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d pexp<Packet2d>(const Packet2d& _x)
@@ -241,7 +241,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
emm0 = _mm_add_epi32(emm0, p4i_1023_0);
emm0 = _mm_slli_epi32(emm0, 20);
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
- return pmul(x, Packet2d(_mm_castsi128_pd(emm0)));
+ return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
}
/* evaluation of 4 sines at onces, using SSE2 intrinsics.
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 6923c88ec..3befd4c25 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -22,13 +22,13 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
-#ifdef EIGEN_VECTORIZE_FMA
-#ifndef EIGEN_HAS_FUSED_MADD
-#define EIGEN_HAS_FUSED_MADD 1
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
#endif
#endif
-#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
+#if defined EIGEN_VECTORIZE_AVX && EIGEN_COMP_GNUC_STRICT
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
// have overloads for both types without linking error.
// One solution is to increase ABI version using -fabi-version=4 (or greater).
@@ -147,7 +147,7 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
-#if defined(_MSC_VER) && (_MSC_VER==1500)
+#if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
@@ -165,7 +165,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { re
// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.
// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.
// Also note that with AVX, we want it to generate a vbroadcastss.
-#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
}
@@ -282,10 +282,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { E
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
-#if defined(_MSC_VER)
+#if EIGEN_COMP_MSVC
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
EIGEN_DEBUG_UNALIGNED_LOAD
- #if (_MSC_VER==1600)
+ #if (EIGEN_COMP_MSVC==1600)
// NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
// (i.e., it does not generate an unaligned load!!
// TODO On most architectures this version should also be faster than a single _mm_loadu_ps
@@ -307,11 +307,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
// TODO: do the same for MSVC (ICC is compatible)
// NOTE: with the code below, MSVC's compiler crashes!
-#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8)))
+#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8)))
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1
-#elif defined(__clang__)
+#elif EIGEN_COMP_CLANG
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
#define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
@@ -439,13 +439,13 @@ template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_p
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
#endif
-#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER)
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
// Direct of the struct members fixed bug #62.
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
-#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+#elif EIGEN_COMP_MSVC_STRICT
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
@@ -680,7 +680,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
#endif // EIGEN_VECTORIZE_SSE4_1
}
-#if (defined __GNUC__)
+#if EIGEN_COMP_GNUC
// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
// {
// Packet4f res = b;
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index ae264aa64..161b0aa93 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -31,7 +31,7 @@ template<typename Scalar>
struct functor_traits<assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost,
- PacketAccess = packet_traits<Scalar>::IsVectorized
+ PacketAccess = packet_traits<Scalar>::Vectorizable
};
};
@@ -73,7 +73,7 @@ template<typename Scalar>
struct functor_traits<sub_assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
- PacketAccess = packet_traits<Scalar>::HasAdd
+ PacketAccess = packet_traits<Scalar>::HasSub
};
};
@@ -81,22 +81,24 @@ struct functor_traits<sub_assign_op<Scalar> > {
* \brief Template functor for scalar/packet assignment with multiplication
*
*/
-template<typename Scalar> struct mul_assign_op {
+template<typename DstScalar, typename SrcScalar=DstScalar>
+struct mul_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
template<int Alignment, typename Packet>
- EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
- { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
};
-template<typename Scalar>
-struct functor_traits<mul_assign_op<Scalar> > {
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
enum {
- Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
- PacketAccess = packet_traits<Scalar>::HasMul
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
};
};
+template<typename DstScalar,typename SrcScalar> struct functor_is_product_like<mul_assign_op<DstScalar,SrcScalar> > { enum { ret = 1 }; };
/** \internal
* \brief Template functor for scalar/packet assignment with diviving
@@ -115,13 +117,13 @@ template<typename Scalar>
struct functor_traits<div_assign_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
- PacketAccess = packet_traits<Scalar>::HasMul
+ PacketAccess = packet_traits<Scalar>::HasDiv
};
};
/** \internal
- * \brief Template functor for scalar/packet assignment with swaping
+ * \brief Template functor for scalar/packet assignment with swapping
*
* It works as follow. For a non-vectorized evaluation loop, we have:
* for(i) func(A.coeffRef(i), B.coeff(i));
@@ -140,8 +142,13 @@ template<typename Scalar> struct swap_assign_op {
EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
{
+#ifdef __CUDACC__
+ // FIXME is there some kind of cuda::swap?
+ Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
+#else
using std::swap;
swap(a,const_cast<Scalar&>(b));
+#endif
}
template<int LhsAlignment, int RhsAlignment, typename Packet>
@@ -156,7 +163,7 @@ template<typename Scalar>
struct functor_traits<swap_assign_op<Scalar> > {
enum {
Cost = 3 * NumTraits<Scalar>::ReadCost,
- PacketAccess = packet_traits<Scalar>::IsVectorized
+ PacketAccess = packet_traits<Scalar>::Vectorizable
};
};
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 157d075a7..9c96181c7 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -115,7 +115,7 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
*/
template<typename Scalar> struct scalar_min_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmin(a,b); }
@@ -138,7 +138,7 @@ struct functor_traits<scalar_min_op<Scalar> > {
*/
template<typename Scalar> struct scalar_max_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); }
template<typename Packet>
EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmax(a,b); }
@@ -164,8 +164,6 @@ template<typename Scalar> struct scalar_hypot_op {
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
{
- EIGEN_USING_STD_MATH(max);
- EIGEN_USING_STD_MATH(min);
using std::sqrt;
Scalar p, qp;
if(_x>_y)
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
deleted file mode 100644
index 637513132..000000000
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ /dev/null
@@ -1,452 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_COEFFBASED_PRODUCT_H
-#define EIGEN_COEFFBASED_PRODUCT_H
-
-namespace Eigen {
-
-namespace internal {
-
-/*********************************************************************************
-* Coefficient based product implementation.
-* It is designed for the following use cases:
-* - small fixed sizes
-* - lazy products
-*********************************************************************************/
-
-/* Since the all the dimensions of the product are small, here we can rely
- * on the generic Assign mechanism to evaluate the product per coeff (or packet).
- *
- * Note that here the inner-loops should always be unrolled.
- */
-
-template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl;
-
-template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl;
-
-template<typename LhsNested, typename RhsNested, int NestingFlags>
-struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
-{
- typedef MatrixXpr XprKind;
- typedef typename remove_all<LhsNested>::type _LhsNested;
- typedef typename remove_all<RhsNested>::type _RhsNested;
- typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
- typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
- typename traits<_RhsNested>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<_LhsNested>::Index,
- typename traits<_RhsNested>::Index>::type Index;
-
- enum {
- LhsCoeffReadCost = _LhsNested::CoeffReadCost,
- RhsCoeffReadCost = _RhsNested::CoeffReadCost,
- LhsFlags = _LhsNested::Flags,
- RhsFlags = _RhsNested::Flags,
-
- RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
- ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
- InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
-
- MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
-
- LhsRowMajor = LhsFlags & RowMajorBit,
- RhsRowMajor = RhsFlags & RowMajorBit,
-
- SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
-
- CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
- && (ColsAtCompileTime == Dynamic
- || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
- && (RhsFlags&AlignedBit)
- )
- ),
-
- CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
- && (RowsAtCompileTime == Dynamic
- || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
- && (LhsFlags&AlignedBit)
- )
- ),
-
- EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
- : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
- : (RhsRowMajor && !CanVectorizeLhs),
-
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
- | (EvalToRowMajor ? RowMajorBit : 0)
- | NestingFlags
- | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
- | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
- // TODO enable vectorization for mixed types
- | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
-
- CoeffReadCost = InnerSize == Dynamic ? Dynamic
- : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
- + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
-
- /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
- * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
- * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
- * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
- */
- CanVectorizeInner = SameType
- && LhsRowMajor
- && (!RhsRowMajor)
- && (LhsFlags & RhsFlags & ActualPacketAccessBit)
- && (LhsFlags & RhsFlags & AlignedBit)
- && (InnerSize % packet_traits<Scalar>::size == 0)
- };
-};
-
-} // end namespace internal
-
-template<typename LhsNested, typename RhsNested, int NestingFlags>
-class CoeffBasedProduct
- : internal::no_assignment_operator,
- public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
-{
- public:
-
- typedef MatrixBase<CoeffBasedProduct> Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
- typedef typename Base::PlainObject PlainObject;
-
- private:
-
- typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
- typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
-
- enum {
- PacketSize = internal::packet_traits<Scalar>::size,
- InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
- Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
- CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
- };
-
- typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
- Unroll ? InnerSize-1 : Dynamic,
- _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
-
- typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
-
- public:
-
- EIGEN_DEVICE_FUNC
- inline CoeffBasedProduct(const CoeffBasedProduct& other)
- : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
- {}
-
- template<typename Lhs, typename Rhs>
- EIGEN_DEVICE_FUNC
- inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
- : m_lhs(lhs), m_rhs(rhs)
- {
- // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
- // We still allow to mix T and complex<T>.
- EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
- YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
- eigen_assert(lhs.cols() == rhs.rows()
- && "invalid matrix product"
- && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
- {
- Scalar res;
- ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
- return res;
- }
-
- /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
- * which is why we don't set the LinearAccessBit.
- */
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
- {
- Scalar res;
- const Index row = RowsAtCompileTime == 1 ? 0 : index;
- const Index col = RowsAtCompileTime == 1 ? index : 0;
- ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
- return res;
- }
-
- template<int LoadMode>
- EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
- {
- PacketScalar res;
- internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
- Unroll ? InnerSize-1 : Dynamic,
- _LhsNested, _RhsNested, PacketScalar, LoadMode>
- ::run(row, col, m_lhs, m_rhs, res);
- return res;
- }
-
- // Implicit conversion to the nested type (trigger the evaluation of the product)
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE operator const PlainObject& () const
- {
- m_result.lazyAssign(*this);
- return m_result;
- }
-
- EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; }
- EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; }
-
- EIGEN_DEVICE_FUNC
- const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
- { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
-
- template<int DiagonalIndex>
- EIGEN_DEVICE_FUNC
- const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
- { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
-
- EIGEN_DEVICE_FUNC
- const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
- { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
-
- protected:
- typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
- typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
-
- mutable PlainObject m_result;
-};
-
-namespace internal {
-
-// here we need to overload the nested rule for products
-// such that the nested type is a const reference to a plain matrix
-template<typename Lhs, typename Rhs, int N, typename PlainObject>
-struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
-{
- typedef PlainObject const& type;
-};
-
-/***************************************************************************
-* Normal product .coeff() implementation (with meta-unrolling)
-***************************************************************************/
-
-/**************************************
-*** Scalar path - no vectorization ***
-**************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
-{
- typedef typename Lhs::Index Index;
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
- {
- product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
- res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
- }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
-{
- typedef typename Lhs::Index Index;
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
- {
- res = lhs.coeff(row, 0) * rhs.coeff(0, col);
- }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
-{
- typedef typename Lhs::Index Index;
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
- {
- eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
- res = lhs.coeff(row, 0) * rhs.coeff(0, col);
- for(Index i = 1; i < lhs.cols(); ++i)
- res += lhs.coeff(row, i) * rhs.coeff(i, col);
- }
-};
-
-/*******************************************
-*** Scalar path with inner vectorization ***
-*******************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
-struct product_coeff_vectorized_unroller
-{
- typedef typename Lhs::Index Index;
- enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
- {
- product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
- pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
- }
-};
-
-template<typename Lhs, typename Rhs, typename Packet>
-struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
- {
- pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
- }
-};
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
-{
- typedef typename Lhs::PacketScalar Packet;
- typedef typename Lhs::Index Index;
- enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
- {
- Packet pres;
- product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
- res = predux(pres);
- }
-};
-
-template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
-struct product_coeff_vectorized_dyn_selector
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
- {
- res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
- }
-};
-
-// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
-// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
-template<typename Lhs, typename Rhs, int RhsCols>
-struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
- {
- res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
- }
-};
-
-template<typename Lhs, typename Rhs, int LhsRows>
-struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
- {
- res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
- }
-};
-
-template<typename Lhs, typename Rhs>
-struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
- {
- res = lhs.transpose().cwiseProduct(rhs).sum();
- }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
- {
- product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
- }
-};
-
-/*******************
-*** Packet path ***
-*******************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
- {
- product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
- res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
- }
-};
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
- {
- product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
- res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
- }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
- {
- res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
- }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
- {
- res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
- }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
- {
- eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
- res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
- for(Index i = 1; i < lhs.cols(); ++i)
- res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
- }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
-{
- typedef typename Lhs::Index Index;
- static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
- {
- eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
- res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
- for(Index i = 1; i < lhs.cols(); ++i)
- res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
- }
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_COEFFBASED_PRODUCT_H
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index b91786037..11e5f591d 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -158,8 +158,8 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, i
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads);
}
-#ifdef EIGEN_HAS_FUSE_CJMADD
- #define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+ #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
#else
// FIXME (a bit overkill maybe ?)
@@ -184,8 +184,8 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, i
gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
}
- #define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
-// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
+ #define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
+// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
#endif
/* Vectorization logic
@@ -220,7 +220,7 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
-#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC)
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@@ -286,7 +286,7 @@ public:
// let gcc allocate the register in which to store the result of the pmul
// (in the case where there is no FMA) gcc fails to figure out how to avoid
// spilling register.
-#ifdef EIGEN_HAS_FUSED_MADD
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd(a,b,c);
#else
@@ -328,7 +328,7 @@ public:
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = 4,
-#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC)
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@@ -391,7 +391,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
-#ifdef EIGEN_HAS_FUSED_MADD
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a.v,b,c.v);
#else
@@ -675,7 +675,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
-#ifdef EIGEN_HAS_FUSED_MADD
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a,b.v,c.v);
#else
@@ -801,31 +801,36 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
- EIGEN_ASM_COMMENT("begin gegp micro kernel 3p x 4");
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
RhsPacket B_0, T0;
LhsPacket A2;
#define EIGEN_GEBGP_ONESTEP(K) \
- internal::prefetch(blA+(3*K+16)*LhsProgress); \
- traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
- traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
- traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
- traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \
- traits.madd(A0, B_0, C0, T0); \
- traits.madd(A1, B_0, C4, T0); \
- traits.madd(A2, B_0, C8, B_0); \
- traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \
- traits.madd(A0, B_0, C1, T0); \
- traits.madd(A1, B_0, C5, T0); \
- traits.madd(A2, B_0, C9, B_0); \
- traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \
- traits.madd(A0, B_0, C2, T0); \
- traits.madd(A1, B_0, C6, T0); \
- traits.madd(A2, B_0, C10, B_0); \
- traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \
- traits.madd(A0, B_0, C3 , T0); \
- traits.madd(A1, B_0, C7, T0); \
- traits.madd(A2, B_0, C11, B_0)
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ internal::prefetch(blA+(3*K+16)*LhsProgress); \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, T0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C1, T0); \
+ traits.madd(A1, B_0, C5, T0); \
+ traits.madd(A2, B_0, C9, B_0); \
+ traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C2, T0); \
+ traits.madd(A1, B_0, C6, T0); \
+ traits.madd(A2, B_0, C10, B_0); \
+ traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C3 , T0); \
+ traits.madd(A1, B_0, C7, T0); \
+ traits.madd(A2, B_0, C11, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
+ } while(false)
internal::prefetch(blB+(48+0));
EIGEN_GEBGP_ONESTEP(0);
@@ -840,6 +845,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*4*RhsProgress;
blA += pk*3*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4");
}
// process remaining peeled loop
for(Index k=peeled_kc; k<depth; k++)
@@ -918,16 +925,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
- EIGEN_ASM_COMMENT("begin gegp micro kernel 3p x 1");
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX1");
RhsPacket B_0;
#define EIGEN_GEBGP_ONESTEP(K) \
- traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
- traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
- traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
- traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
- traits.madd(A0, B_0, C0, B_0); \
- traits.madd(A1, B_0, C4, B_0); \
- traits.madd(A2, B_0, C8, B_0)
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \
+ } while(false)
EIGEN_GEBGP_ONESTEP(0);
EIGEN_GEBGP_ONESTEP(1);
@@ -940,6 +952,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*RhsProgress;
blA += pk*3*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1");
}
// process remaining peeled loop
@@ -1005,22 +1019,27 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
- EIGEN_ASM_COMMENT("begin gegp micro kernel 2pX4");
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
RhsPacket B_0, B1, B2, B3, T0;
#define EIGEN_GEBGP_ONESTEP(K) \
- traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
- traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
- traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
- traits.madd(A0, B_0, C0, T0); \
- traits.madd(A1, B_0, C4, B_0); \
- traits.madd(A0, B1, C1, T0); \
- traits.madd(A1, B1, C5, B1); \
- traits.madd(A0, B2, C2, T0); \
- traits.madd(A1, B2, C6, B2); \
- traits.madd(A0, B3, C3, T0); \
- traits.madd(A1, B3, C7, B3)
-
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A0, B1, C1, T0); \
+ traits.madd(A1, B1, C5, B1); \
+ traits.madd(A0, B2, C2, T0); \
+ traits.madd(A1, B2, C6, B2); \
+ traits.madd(A0, B3, C3, T0); \
+ traits.madd(A1, B3, C7, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
+ } while(false)
+
internal::prefetch(blB+(48+0));
EIGEN_GEBGP_ONESTEP(0);
EIGEN_GEBGP_ONESTEP(1);
@@ -1034,6 +1053,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*4*RhsProgress;
blA += pk*(2*Traits::LhsProgress);
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4");
}
// process remaining peeled loop
for(Index k=peeled_kc; k<depth; k++)
@@ -1096,15 +1117,20 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
- EIGEN_ASM_COMMENT("begin gegp micro kernel 2p x 1");
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
RhsPacket B_0, B1;
#define EIGEN_GEBGP_ONESTEP(K) \
- traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
- traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
- traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
- traits.madd(A0, B_0, C0, B1); \
- traits.madd(A1, B_0, C4, B_0)
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B1); \
+ traits.madd(A1, B_0, C4, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
+ } while(false)
EIGEN_GEBGP_ONESTEP(0);
EIGEN_GEBGP_ONESTEP(1);
@@ -1117,6 +1143,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*RhsProgress;
blA += pk*2*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
}
// process remaining peeled loop
@@ -1179,16 +1207,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
- EIGEN_ASM_COMMENT("begin gegp micro kernel 1pX4");
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX4");
RhsPacket B_0, B1, B2, B3;
#define EIGEN_GEBGP_ONESTEP(K) \
- traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
- traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
- traits.madd(A0, B_0, C0, B_0); \
- traits.madd(A0, B1, C1, B1); \
- traits.madd(A0, B2, C2, B2); \
- traits.madd(A0, B3, C3, B3);
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A0, B1, C1, B1); \
+ traits.madd(A0, B2, C2, B2); \
+ traits.madd(A0, B3, C3, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4"); \
+ } while(false)
internal::prefetch(blB+(48+0));
EIGEN_GEBGP_ONESTEP(0);
@@ -1203,6 +1236,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*4*RhsProgress;
blA += pk*1*LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4");
}
// process remaining peeled loop
for(Index k=peeled_kc; k<depth; k++)
@@ -1251,14 +1286,19 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
- EIGEN_ASM_COMMENT("begin gegp micro kernel 2p x 1");
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
RhsPacket B_0;
#define EIGEN_GEBGP_ONESTEP(K) \
- traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
- traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
- traits.madd(A0, B_0, C0, B_0); \
-
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
+ } while(false);
+
EIGEN_GEBGP_ONESTEP(0);
EIGEN_GEBGP_ONESTEP(1);
EIGEN_GEBGP_ONESTEP(2);
@@ -1270,6 +1310,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*RhsProgress;
blA += pk*1*Traits::LhsProgress;
+
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
}
// process remaining peeled loop
@@ -1402,14 +1444,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
B_0 = blB[0];
B_1 = blB[1];
- MADD(cj,A0,B_0,C0, B_0);
- MADD(cj,A0,B_1,C1, B_1);
-
+ CJMADD(cj,A0,B_0,C0, B_0);
+ CJMADD(cj,A0,B_1,C1, B_1);
+
B_0 = blB[2];
B_1 = blB[3];
- MADD(cj,A0,B_0,C2, B_0);
- MADD(cj,A0,B_1,C3, B_1);
-
+ CJMADD(cj,A0,B_0,C2, B_0);
+ CJMADD(cj,A0,B_1,C3, B_1);
+
blB += 4;
}
res(i, j2 + 0) += alpha * C0;
@@ -1434,7 +1476,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
{
LhsScalar A0 = blA[k];
RhsScalar B_0 = blB[k];
- MADD(cj, A0, B_0, C0, B_0);
+ CJMADD(cj, A0, B_0, C0, B_0);
}
res(i, j2) += alpha * C0;
}
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 49362adbe..fd9443cd2 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -228,8 +228,8 @@ struct gemm_functor
cols = m_rhs.cols();
Gemm::run(rows, cols, m_lhs.cols(),
- /*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(),
- /*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(),
+ &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
+ &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
m_actualAlpha, m_blocking, info);
}
@@ -379,84 +379,92 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
} // end namespace internal
+namespace internal {
+
template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, GemmProduct>
- : public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
{
- enum {
- MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
- };
- public:
- EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
- typedef typename Lhs::Scalar LhsScalar;
- typedef typename Rhs::Scalar RhsScalar;
- typedef Scalar ResScalar;
-
- GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
- {
- typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
- EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
- }
-
- template<typename Dest>
- inline void evalTo(Dest& dst) const
- {
- if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
- dst.noalias() = m_lhs .lazyProduct( m_rhs );
- else
- {
- dst.setZero();
- scaleAndAddTo(dst,Scalar(1));
- }
- }
-
- template<typename Dest>
- inline void addTo(Dest& dst) const
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ typedef typename Product<Lhs,Rhs>::Index Index;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+ enum {
+ MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
+ };
+
+ typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
+
+ template<typename Dst>
+ static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::evalTo(dst, lhs, rhs);
+ else
{
- if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
- dst.noalias() += m_lhs .lazyProduct( m_rhs );
- else
- scaleAndAddTo(dst,Scalar(1));
+ dst.setZero();
+ scaleAndAddTo(dst, lhs, rhs, Scalar(1));
}
+ }
- template<typename Dest>
- inline void subTo(Dest& dst) const
- {
- if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
- dst.noalias() -= m_lhs .lazyProduct( m_rhs );
- else
- scaleAndAddTo(dst,Scalar(-1));
- }
-
- template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
- {
- eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ template<typename Dst>
+ static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::addTo(dst, lhs, rhs);
+ else
+ scaleAndAddTo(dst,lhs, rhs, Scalar(1));
+ }
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+ template<typename Dst>
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::subTo(dst, lhs, rhs);
+ else
+ scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
+ }
+
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
+ {
+ eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
- * RhsBlasTraits::extractScalarFactor(m_rhs);
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
- typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
- Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+ * RhsBlasTraits::extractScalarFactor(a_rhs);
- typedef internal::gemm_functor<
- Scalar, Index,
- internal::general_matrix_matrix_product<
- Index,
- LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
- RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
- (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
- _ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
+ Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
- BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
+ typedef internal::gemm_functor<
+ Scalar, Index,
+ internal::general_matrix_matrix_product<
+ Index,
+ LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
+ RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
+ (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
+ ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
- internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
- }
+ BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
+ internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
+ (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
+ }
};
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_GENERAL_MATRIX_MATRIX_H
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 8de39f76f..e55994900 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -20,7 +20,7 @@ namespace internal {
/**********************************************************************
* This file implements a general A * B product while
* evaluating only one triangular part of the product.
-* This is more general version of self adjoint product (C += A A^T)
+* This is a more general version of self adjoint product (C += A A^T)
* as the level 3 SYRK Blas routine.
**********************************************************************/
@@ -270,14 +270,14 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
};
template<typename MatrixType, unsigned int UpLo>
-template<typename ProductDerived, typename _Lhs, typename _Rhs>
-TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
+template<typename ProductType>
+TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha)
{
- eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols());
-
- general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
+ eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
+
+ general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha);
- return *this;
+ return derived();
}
} // end namespace Eigen
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index 837e69415..2b90abf8f 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -129,7 +129,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
Index blockRows = (rows / threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
- GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
+ ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
#pragma omp parallel num_threads(threads)
{
@@ -146,8 +146,6 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
if(transpose) func(c0, actualBlockCols, 0, rows, info);
else func(0, rows, c0, actualBlockCols, info);
}
-
- delete[] info;
#endif
}
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 21f8175d2..e831ee20f 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -469,55 +469,54 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
***************************************************************************/
namespace internal {
+
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
-struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
- : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
-{};
-}
-
-template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
-struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
- : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
+struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
{
- EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
-
- SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ typedef typename Product<Lhs,Rhs>::Index Index;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+
enum {
LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
};
-
- template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+
+ template<typename Dest>
+ static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
{
- eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
- * RhsBlasTraits::extractScalarFactor(m_rhs);
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+ * RhsBlasTraits::extractScalarFactor(a_rhs);
internal::product_selfadjoint_matrix<Scalar, Index,
- EIGEN_LOGICAL_XOR(LhsIsUpper,
- internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
+ EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
- EIGEN_LOGICAL_XOR(RhsIsUpper,
- internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
+ EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
::run(
- lhs.rows(), rhs.cols(), // sizes
- &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
- &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
- &dst.coeffRef(0,0), dst.outerStride(), // result info
- actualAlpha // alpha
+ lhs.rows(), rhs.cols(), // sizes
+ &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
+ &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
+ &dst.coeffRef(0,0), dst.outerStride(), // result info
+ actualAlpha // alpha
);
}
};
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index 26e787949..372a44e47 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -169,45 +169,45 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
***************************************************************************/
namespace internal {
-template<typename Lhs, int LhsMode, typename Rhs>
-struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
- : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
-{};
-}
template<typename Lhs, int LhsMode, typename Rhs>
-struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
- : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
+struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
{
- EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
-
- enum {
- LhsUpLo = LhsMode&(Upper|Lower)
- };
-
- SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
- template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ typedef typename Product<Lhs,Rhs>::Index Index;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+ enum { LhsUpLo = LhsMode&(Upper|Lower) };
+
+ template<typename Dest>
+ static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
{
typedef typename Dest::Scalar ResScalar;
- typedef typename Base::RhsScalar RhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
- eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
+ eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
- * RhsBlasTraits::extractScalarFactor(m_rhs);
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+ * RhsBlasTraits::extractScalarFactor(a_rhs);
enum {
EvalToDest = (Dest::InnerStrideAtCompileTime==1),
- UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
+ UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1)
};
internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
- internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
+ internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
EvalToDest ? dest.data() : static_dest.data());
@@ -230,11 +230,12 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
Index size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
- Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
+ Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
}
- internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
+ internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+ int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
(
lhs.rows(), // size
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
@@ -248,34 +249,24 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
}
};
-namespace internal {
-template<typename Lhs, typename Rhs, int RhsMode>
-struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
- : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
-{};
-}
-
template<typename Lhs, typename Rhs, int RhsMode>
-struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
- : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
+struct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false>
{
- EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
-
- enum {
- RhsUpLo = RhsMode&(Upper|Lower)
- };
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ enum { RhsUpLo = RhsMode&(Upper|Lower) };
- SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
- template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ template<typename Dest>
+ static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
{
// let's simply transpose the product
Transpose<Dest> destT(dest);
- SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
- Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
+ selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
+ Transpose<const Lhs>, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha);
}
};
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 8594a97ce..2ae364111 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -79,11 +79,11 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
if (IsRowMajor)
actualAlpha = numext::conj(actualAlpha);
- internal::selfadjoint_rank2_update_selector<Scalar, Index,
- typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type,
- typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type,
+ typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
+ typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
+ internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
(IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
- ::run(_expression().const_cast_derived().data(),_expression().outerStride(),actualU,actualV,actualAlpha);
+ ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);
return *this;
}
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 4cbb79da0..60c99dcd2 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -380,28 +380,29 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
* Wrapper to product_triangular_matrix_matrix
***************************************************************************/
-template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
- : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
-{};
-
} // end namespace internal
+namespace internal {
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
- : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
+struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
{
- EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
-
- TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
- template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
{
- typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
- typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+ typedef typename Dest::Index Index;
+ typedef typename Dest::Scalar Scalar;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
- * RhsBlasTraits::extractScalarFactor(m_rhs);
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+ * RhsBlasTraits::extractScalarFactor(a_rhs);
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@@ -416,19 +417,21 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
internal::product_triangular_matrix_matrix<Scalar, Index,
Mode, LhsIsTriangular,
- (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
- (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+ (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+ (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(
stripedRows, stripedCols, stripedDepth, // sizes
- &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
- &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
+ &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
+ &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha, blocking
);
}
};
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index d33e3f409..4d88a710b 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -163,83 +163,67 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,Con
* Wrapper to product_triangular_vector
***************************************************************************/
-template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> >
- : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> >
-{};
-
-template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> >
- : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> >
-{};
-
-
-template<int StorageOrder>
+template<int Mode,int StorageOrder>
struct trmv_selector;
} // end namespace internal
+namespace internal {
+
template<int Mode, typename Lhs, typename Rhs>
-struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
- : public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs >
+struct triangular_product_impl<Mode,true,Lhs,false,Rhs,true>
{
- EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
-
- TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
- template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
{
- eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
-
- internal::trmv_selector<(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha);
+ eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
+
+ internal::trmv_selector<Mode,(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha);
}
};
template<int Mode, typename Lhs, typename Rhs>
-struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
- : public ProductBase<TriangularProduct<Mode,false,Lhs,true,Rhs,false>, Lhs, Rhs >
+struct triangular_product_impl<Mode,false,Lhs,true,Rhs,false>
{
- EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
-
- TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
- template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+ template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
{
- eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+ eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
- typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
Transpose<Dest> dstT(dst);
- internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
- TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
+ internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),
+ (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>
+ ::run(rhs.transpose(),lhs.transpose(), dstT, alpha);
}
};
+} // end namespace internal
+
namespace internal {
// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
-
-template<> struct trmv_selector<ColMajor>
+
+template<int Mode> struct trmv_selector<Mode,ColMajor>
{
- template<int Mode, typename Lhs, typename Rhs, typename Dest>
- static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
- typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
- typedef typename ProductType::Index Index;
- typedef typename ProductType::LhsScalar LhsScalar;
- typedef typename ProductType::RhsScalar RhsScalar;
- typedef typename ProductType::Scalar ResScalar;
- typedef typename ProductType::RealScalar RealScalar;
- typedef typename ProductType::ActualLhsType ActualLhsType;
- typedef typename ProductType::ActualRhsType ActualRhsType;
- typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
- typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+ typedef typename Dest::Index Index;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+ typedef typename Dest::RealScalar RealScalar;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
- typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
- typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+ typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
- * RhsBlasTraits::extractScalarFactor(prod.rhs());
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -294,33 +278,33 @@ template<> struct trmv_selector<ColMajor>
}
};
-template<> struct trmv_selector<RowMajor>
+template<int Mode> struct trmv_selector<Mode,RowMajor>
{
- template<int Mode, typename Lhs, typename Rhs, typename Dest>
- static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
- typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
- typedef typename ProductType::LhsScalar LhsScalar;
- typedef typename ProductType::RhsScalar RhsScalar;
- typedef typename ProductType::Scalar ResScalar;
- typedef typename ProductType::Index Index;
- typedef typename ProductType::ActualLhsType ActualLhsType;
- typedef typename ProductType::ActualRhsType ActualRhsType;
- typedef typename ProductType::_ActualRhsType _ActualRhsType;
- typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
- typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-
- typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
- typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
- * RhsBlasTraits::extractScalarFactor(prod.rhs());
+ typedef typename Dest::Index Index;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+ typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+ typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
enum {
- DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+ DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
};
- gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+ gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
@@ -331,7 +315,7 @@ template<> struct trmv_selector<RowMajor>
Index size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
- Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+ Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
internal::triangular_matrix_vector_product
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index c4881b8da..3ec55fad2 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -334,7 +334,7 @@ struct blas_traits<Transpose<NestedXpr> >
enum {
IsTransposed = Base::IsTransposed ? 0 : 1
};
- static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
};
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 05107fdfe..9b40093f0 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -53,14 +53,13 @@ const int Infinity = -1;
const unsigned int RowMajorBit = 0x1;
/** \ingroup flags
- *
* means the expression should be evaluated by the calling expression */
const unsigned int EvalBeforeNestingBit = 0x2;
/** \ingroup flags
- *
+ * \deprecated
* means the expression should be evaluated before any assignment */
-const unsigned int EvalBeforeAssigningBit = 0x4;
+const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated
/** \ingroup flags
*
@@ -155,6 +154,16 @@ const unsigned int AlignedBit = 0x80;
const unsigned int NestByRefBit = 0x100;
+/** \ingroup flags
+ *
+ * for an expression, this means that the storage order
+ * can be either row-major or column-major.
+ * The precise choice will be decided at evaluation time or when
+ * combined with other expressions.
+ * \sa \ref RowMajorBit, \ref TopicStorageOrders */
+const unsigned int NoPreferredStorageOrderBit = 0x200;
+
+
// list of flags that are inherited by default
const unsigned int HereditaryBits = RowMajorBit
| EvalBeforeNestingBit
@@ -413,10 +422,16 @@ namespace Architecture
Generic = 0x0,
SSE = 0x1,
AltiVec = 0x2,
+ VSX = 0x3,
+ NEON = 0x4,
#if defined EIGEN_VECTORIZE_SSE
Target = SSE
#elif defined EIGEN_VECTORIZE_ALTIVEC
Target = AltiVec
+#elif defined EIGEN_VECTORIZE_VSX
+ Target = VSX
+#elif defined EIGEN_VECTORIZE_NEON
+ Target = NEON
#else
Target = Generic
#endif
@@ -425,7 +440,7 @@ namespace Architecture
/** \internal \ingroup enums
* Enum used as template parameter in GeneralProduct. */
-enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
+enum { DefaultProduct=0, CoeffBasedProductMode, LazyCoeffBasedProductMode, LazyProduct, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
/** \internal \ingroup enums
* Enum used in experimental parallel implementation. */
@@ -434,12 +449,38 @@ enum Action {GetAction, SetAction};
/** The type used to identify a dense storage. */
struct Dense {};
+/** The type used to identify a general sparse storage. */
+struct Sparse {};
+
+/** The type used to identify a permutation storage. */
+struct PermutationStorage {};
+
/** The type used to identify a matrix expression */
struct MatrixXpr {};
/** The type used to identify an array expression */
struct ArrayXpr {};
+// An evaluator must define its shape. By default, it can be one of the following:
+struct DenseShape { static std::string debugName() { return "DenseShape"; } };
+struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } };
+struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } };
+struct BandShape { static std::string debugName() { return "BandShape"; } };
+struct TriangularShape { static std::string debugName() { return "TriangularShape"; } };
+struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } };
+struct PermutationShape { static std::string debugName() { return "PermutationShape"; } };
+struct SparseShape { static std::string debugName() { return "SparseShape"; } };
+
+namespace internal {
+
+ // random access iterators based on coeff*() accessors.
+struct IndexBased {};
+
+// evaluator based on iterators to access coefficients.
+struct IteratorBased {};
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_CONSTANTS_H
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 33deb88ec..c23892c50 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -36,6 +36,10 @@ template<typename Derived> struct accessors_level
};
};
+template<typename T> struct evaluator_traits;
+
+template< typename T> struct evaluator;
+
} // end namespace internal
template<typename T> struct NumTraits;
@@ -51,7 +55,7 @@ class DenseCoeffsBase;
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |
-#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4
+#if EIGEN_GNUC_AT(3,4)
// workaround a bug in at least gcc 3.4.6
// the innermost ?: ternary operator is misparsed. We write it slightly
// differently and this makes gcc 3.4.6 happy, but it's ugly.
@@ -87,11 +91,19 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
-template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp;
-template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
-template<typename Lhs, typename Rhs> class Product;
-template<typename Lhs, typename Rhs, int Mode> class GeneralProduct;
-template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct;
+template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp; // TODO deprecated
+template<typename Derived, typename Lhs, typename Rhs> class ProductBase; // TODO deprecated
+template<typename Decomposition, typename Rhstype> class Solve;
+template<typename XprType> class Inverse;
+
+namespace internal {
+ template<typename Lhs, typename Rhs> struct product_tag;
+}
+
+template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
+
+template<typename Lhs, typename Rhs, int Mode> class GeneralProduct; // TODO deprecated
+template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct; // TODO deprecated
template<typename Derived> class DiagonalBase;
template<typename _DiagonalVectorType> class DiagonalWrapper;
@@ -109,7 +121,12 @@ template<typename Derived,
int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
> class MapBase;
template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<int Value = Dynamic> class InnerStride;
+template<int Value = Dynamic> class OuterStride;
template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
+template<typename Derived> class RefBase;
+template<typename PlainObjectType, int Options = 0,
+ typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
template<typename Derived> class TriangularBase;
template<typename MatrixType, unsigned int Mode> class TriangularView;
@@ -120,10 +137,9 @@ template<typename MatrixType> struct CommaInitializer;
template<typename Derived> class ReturnByValue;
template<typename ExpressionType> class ArrayWrapper;
template<typename ExpressionType> class MatrixWrapper;
+template<typename XprType> class InnerIterator;
namespace internal {
-template<typename DecompositionType, typename Rhs> struct solve_retval_base;
-template<typename DecompositionType, typename Rhs> struct solve_retval;
template<typename DecompositionType> struct kernel_retval_base;
template<typename DecompositionType> struct kernel_retval;
template<typename DecompositionType> struct image_retval_base;
@@ -136,6 +152,18 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami
namespace internal {
template<typename Lhs, typename Rhs> struct product_type;
+/** \internal
+ * \class product_evaluator
+ * Products need their own evaluator with more template arguments allowing for
+ * easier partial template specializations.
+ */
+template< typename T,
+ int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
+ typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
+ typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+ typename RhsScalar = typename traits<typename T::Rhs>::Scalar
+ > struct product_evaluator;
}
template<typename Lhs, typename Rhs,
@@ -193,7 +221,7 @@ struct IOFormat;
// Array module
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |
-#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4
+#if EIGEN_GNUC_AT(3,4)
// workaround a bug in at least gcc 3.4.6
// the innermost ?: ternary operator is misparsed. We write it slightly
// differently and this makes gcc 3.4.6 happy, but it's ugly.
@@ -223,6 +251,7 @@ template<typename MatrixType> class HouseholderQR;
template<typename MatrixType> class ColPivHouseholderQR;
template<typename MatrixType> class FullPivHouseholderQR;
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
+template<typename MatrixType> class BDCSVD;
template<typename MatrixType, int UpLo = Lower> class LLT;
template<typename MatrixType, int UpLo = Lower> class LDLT;
template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 40a28d4d6..dc5f13673 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -18,31 +18,280 @@
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
EIGEN_MINOR_VERSION>=z))))
+
+// Compiler identification, EIGEN_COMP_*
+
+/// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC
#ifdef __GNUC__
- #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
+ #define EIGEN_COMP_GNUC 1
#else
- #define EIGEN_GNUC_AT_LEAST(x,y) 0
+ #define EIGEN_COMP_GNUC 0
#endif
-
-#ifdef __GNUC__
- #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+
+/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__)
+#if defined(__clang__)
+ #define EIGEN_COMP_CLANG 1
#else
- #define EIGEN_GNUC_AT_MOST(x,y) 0
+ #define EIGEN_COMP_CLANG 0
#endif
-#if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__)
- // see bug 89
- #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
+
+/// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm
+#if defined(__llvm__)
+ #define EIGEN_COMP_LLVM 1
#else
- #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
+ #define EIGEN_COMP_LLVM 0
+#endif
+
+/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise
+#if defined(__INTEL_COMPILER)
+ #define EIGEN_COMP_ICC __INTEL_COMPILER
+#else
+ #define EIGEN_COMP_ICC 0
+#endif
+
+/// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw
+#if defined(__MINGW32__)
+ #define EIGEN_COMP_MINGW 1
+#else
+ #define EIGEN_COMP_MINGW 0
+#endif
+
+/// \internal EIGEN_COMP_SUNCC set to 1 if the compiler is Solaris Studio
+#if defined(__SUNPRO_CC)
+ #define EIGEN_COMP_SUNCC 1
+#else
+ #define EIGEN_COMP_SUNCC 0
+#endif
+
+/// \internal EIGEN_COMP_MSVC set to _MSC_VER if the compiler is Microsoft Visual C++, 0 otherwise.
+#if defined(_MSC_VER)
+ #define EIGEN_COMP_MSVC _MSC_VER
+#else
+ #define EIGEN_COMP_MSVC 0
+#endif
+
+/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC
+#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC)
+ #define EIGEN_COMP_MSVC_STRICT _MSC_VER
+#else
+ #define EIGEN_COMP_MSVC_STRICT 0
+#endif
+
+/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++
+#if defined(__IBMCPP__) || defined(__xlc__)
+ #define EIGEN_COMP_IBM 1
+#else
+ #define EIGEN_COMP_IBM 0
+#endif
+
+/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler
+#if defined(__PGI)
+ #define EIGEN_COMP_PGI 1
+#else
+ #define EIGEN_COMP_PGI 0
+#endif
+
+/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
+#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
+ #define EIGEN_COMP_ARM 1
+#else
+ #define EIGEN_COMP_ARM 0
+#endif
+
+
+/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_CLANG || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
+ #define EIGEN_COMP_GNUC_STRICT 1
+#else
+ #define EIGEN_COMP_GNUC_STRICT 0
#endif
-#if defined(__GNUC__) && (__GNUC__ <= 3)
+
+#if EIGEN_COMP_GNUC
+ #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
+ #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+ #define EIGEN_GNUC_AT(x,y) ( __GNUC__==x && __GNUC_MINOR__==y )
+#else
+ #define EIGEN_GNUC_AT_LEAST(x,y) 0
+ #define EIGEN_GNUC_AT_MOST(x,y) 0
+ #define EIGEN_GNUC_AT(x,y) 0
+#endif
+
+// FIXME: could probably be removed as we do not support gcc 3.x anymore
+#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
#define EIGEN_GCC3_OR_OLDER 1
#else
#define EIGEN_GCC3_OR_OLDER 0
#endif
+
+// Architecture identification, EIGEN_ARCH_*
+
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
+ #define EIGEN_ARCH_x86_64 1
+#else
+ #define EIGEN_ARCH_x86_64 0
+#endif
+
+#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
+ #define EIGEN_ARCH_i386 1
+#else
+ #define EIGEN_ARCH_i386 0
+#endif
+
+#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
+ #define EIGEN_ARCH_i386_OR_x86_64 1
+#else
+ #define EIGEN_ARCH_i386_OR_x86_64 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM set to 1 if the architecture is ARM
+#if defined(__arm__)
+ #define EIGEN_ARCH_ARM 1
+#else
+ #define EIGEN_ARCH_ARM 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64
+#if defined(__aarch64__)
+ #define EIGEN_ARCH_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM64 0
+#endif
+
+#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
+ #define EIGEN_ARCH_ARM_OR_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM_OR_ARM64 0
+#endif
+
+/// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS
+#if defined(__mips__) || defined(__mips)
+ #define EIGEN_ARCH_MIPS 1
+#else
+ #define EIGEN_ARCH_MIPS 0
+#endif
+
+/// \internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC
+#if defined(__sparc__) || defined(__sparc)
+ #define EIGEN_ARCH_SPARC 1
+#else
+ #define EIGEN_ARCH_SPARC 0
+#endif
+
+/// \internal EIGEN_ARCH_IA64 set to 1 if the architecture is Intel Itanium
+#if defined(__ia64__)
+ #define EIGEN_ARCH_IA64 1
+#else
+ #define EIGEN_ARCH_IA64 0
+#endif
+
+/// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC
+#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)
+ #define EIGEN_ARCH_PPC 1
+#else
+ #define EIGEN_ARCH_PPC 0
+#endif
+
+
+
+// Operating system identification, EIGEN_OS_*
+
+/// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant
+#if defined(__unix__) || defined(__unix)
+ #define EIGEN_OS_UNIX 1
+#else
+ #define EIGEN_OS_UNIX 0
+#endif
+
+/// \internal EIGEN_OS_LINUX set to 1 if the OS is based on Linux kernel
+#if defined(__linux__)
+ #define EIGEN_OS_LINUX 1
+#else
+ #define EIGEN_OS_LINUX 0
+#endif
+
+/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
+#if defined(__ANDROID__)
+ #define EIGEN_OS_ANDROID 1
+#else
+ #define EIGEN_OS_ANDROID 0
+#endif
+
+/// \internal EIGEN_OS_GNULINUX set to 1 if the OS is GNU Linux and not Linux-based OS (e.g., not android)
+#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
+ #define EIGEN_OS_GNULINUX 1
+#else
+ #define EIGEN_OS_GNULINUX 0
+#endif
+
+/// \internal EIGEN_OS_BSD set to 1 if the OS is a BSD variant
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
+ #define EIGEN_OS_BSD 1
+#else
+ #define EIGEN_OS_BSD 0
+#endif
+
+/// \internal EIGEN_OS_MAC set to 1 if the OS is MacOS
+#if defined(__APPLE__)
+ #define EIGEN_OS_MAC 1
+#else
+ #define EIGEN_OS_MAC 0
+#endif
+
+/// \internal EIGEN_OS_QNX set to 1 if the OS is QNX
+#if defined(__QNX__)
+ #define EIGEN_OS_QNX 1
+#else
+ #define EIGEN_OS_QNX 0
+#endif
+
+/// \internal EIGEN_OS_WIN set to 1 if the OS is Windows based
+#if defined(_WIN32)
+ #define EIGEN_OS_WIN 1
+#else
+ #define EIGEN_OS_WIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN64 set to 1 if the OS is Windows 64bits
+#if defined(_WIN64)
+ #define EIGEN_OS_WIN64 1
+#else
+ #define EIGEN_OS_WIN64 0
+#endif
+
+/// \internal EIGEN_OS_WINCE set to 1 if the OS is Windows CE
+#if defined(_WIN32_WCE)
+ #define EIGEN_OS_WINCE 1
+#else
+ #define EIGEN_OS_WINCE 0
+#endif
+
+/// \internal EIGEN_OS_CYGWIN set to 1 if the OS is Windows/Cygwin
+#if defined(__CYGWIN__)
+ #define EIGEN_OS_CYGWIN 1
+#else
+ #define EIGEN_OS_CYGWIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN_STRICT set to 1 if the OS is really Windows and not some variants
+#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )
+ #define EIGEN_OS_WIN_STRICT 1
+#else
+ #define EIGEN_OS_WIN_STRICT 0
+#endif
+
+
+
+
+#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
+ // see bug 89
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
+#else
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
+#endif
+
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
@@ -50,7 +299,7 @@
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
// when we have to disable static alignment.
-#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__))
+#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
@@ -59,8 +308,8 @@
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
&& !EIGEN_GCC3_OR_OLDER \
- && !defined(__SUNPRO_CC) \
- && !defined(__QNXNTO__)
+ && !EIGEN_COMP_SUNCC \
+ && !EIGEN_OS_QNX
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
@@ -86,6 +335,11 @@
#define EIGEN_ALIGN 0
#endif
+
+// This macro can be used to prevent from macro expansion, e.g.:
+// std::max EIGEN_NOT_A_MACRO(a,b)
+#define EIGEN_NOT_A_MACRO
+
// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
@@ -124,7 +378,7 @@
#if (__has_feature(cxx_rvalue_references) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \
defined(__GXX_EXPERIMENTAL_CXX0X__) || \
- (defined(_MSC_VER) && _MSC_VER >= 1600))
+ (EIGEN_COMP_MSVC >= 1600))
#define EIGEN_HAVE_RVALUE_REFERENCES
#endif
@@ -161,7 +415,7 @@
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline.
-#if (defined _MSC_VER) || (defined __INTEL_COMPILER)
+#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
@@ -180,15 +434,15 @@
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
#endif
-#if (defined __GNUC__)
+#if EIGEN_COMP_GNUC
#define EIGEN_DONT_INLINE __attribute__((noinline))
-#elif (defined _MSC_VER)
+#elif EIGEN_COMP_MSVC
#define EIGEN_DONT_INLINE __declspec(noinline)
#else
#define EIGEN_DONT_INLINE
#endif
-#if (defined __GNUC__)
+#if EIGEN_COMP_GNUC
#define EIGEN_PERMISSIVE_EXPR __extension__
#else
#define EIGEN_PERMISSIVE_EXPR
@@ -257,15 +511,15 @@
#endif
#ifdef EIGEN_NO_DEBUG
-#define EIGEN_ONLY_USED_FOR_DEBUG(x) (void)x
+#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x)
#else
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
#endif
#ifndef EIGEN_NO_DEPRECATED_WARNING
- #if (defined __GNUC__)
+ #if EIGEN_COMP_GNUC
#define EIGEN_DEPRECATED __attribute__((deprecated))
- #elif (defined _MSC_VER)
+ #elif EIGEN_COMP_MSVC
#define EIGEN_DEPRECATED __declspec(deprecated)
#else
#define EIGEN_DEPRECATED
@@ -274,7 +528,7 @@
#define EIGEN_DEPRECATED
#endif
-#if (defined __GNUC__)
+#if EIGEN_COMP_GNUC
#define EIGEN_UNUSED __attribute__((unused))
#else
#define EIGEN_UNUSED
@@ -283,13 +537,13 @@
// Suppresses 'unused variable' warnings.
namespace Eigen {
namespace internal {
- template<typename T> void ignore_unused_variable(const T&) {}
+ template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {}
}
}
#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
#if !defined(EIGEN_ASM_COMMENT)
- #if (defined __GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
+ #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
#define EIGEN_ASM_COMMENT(X) __asm__("#" X)
#else
#define EIGEN_ASM_COMMENT(X)
@@ -304,12 +558,12 @@ namespace Eigen {
* vectorized and non-vectorized code.
*/
#if (defined __CUDACC__)
-#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
-#elif (defined __GNUC__) || (defined __PGI) || (defined __IBMCPP__) || (defined __ARMCC_VERSION)
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
+#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
-#elif (defined _MSC_VER)
+#elif EIGEN_COMP_MSVC
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
-#elif (defined __SUNPRO_CC)
+#elif EIGEN_COMP_SUNCC
// FIXME not sure about this one:
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#else
@@ -357,27 +611,26 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
-#if defined(_MSC_VER) && (!defined(__INTEL_COMPILER))
-#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
- using Base::operator =;
-#elif defined(__clang__) // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
-#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
- using Base::operator =; \
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
- template <typename OtherDerived> \
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
-#else
-#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
- using Base::operator =; \
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
- { \
- Base::operator=(other); \
- return *this; \
- }
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =;
+#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
+ template <typename OtherDerived> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
+#else
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
+ { \
+ Base::operator=(other); \
+ return *this; \
+ }
#endif
-#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
- EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
+#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
/**
* Just a side note. Commenting within defines works only by documenting
@@ -387,6 +640,8 @@ namespace Eigen {
* documentation in a single line.
**/
+// TODO The EIGEN_DENSE_PUBLIC_INTERFACE should not exists anymore
+
#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
@@ -397,7 +652,6 @@ namespace Eigen {
enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
Flags = Eigen::internal::traits<Derived>::Flags, \
- CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
SizeAtCompileTime = Base::SizeAtCompileTime, \
MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
IsVectorAtCompileTime = Base::IsVectorAtCompileTime };
@@ -416,13 +670,11 @@ namespace Eigen {
MaxRowsAtCompileTime = Eigen::internal::traits<Derived>::MaxRowsAtCompileTime, \
MaxColsAtCompileTime = Eigen::internal::traits<Derived>::MaxColsAtCompileTime, \
Flags = Eigen::internal::traits<Derived>::Flags, \
- CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
SizeAtCompileTime = Base::SizeAtCompileTime, \
MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
using Base::derived; \
- using Base::const_cast_derived;
-
+ using Base::const_cast_derived;
#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 30133ba67..bacf236fb 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
@@ -42,15 +42,15 @@
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
-#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__) && (EIGEN_ALIGN_BYTES == 16)
+#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16)
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
#endif
-#if (defined(__APPLE__) && (EIGEN_ALIGN_BYTES == 16)) \
- || (defined(_WIN64) && (EIGEN_ALIGN_BYTES == 16)) \
- || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
+#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16)) \
+ || (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16)) \
+ || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
|| EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
#else
@@ -62,9 +62,9 @@
// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
// Currently, let's include it only on unix systems:
-#if defined(__unix__) || defined(__unix)
+#if EIGEN_OS_UNIX
#include <unistd.h>
- #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || (defined __PGI) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+ #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
#define EIGEN_HAS_POSIX_MEMALIGN 1
#endif
#endif
@@ -224,7 +224,7 @@ inline void* aligned_malloc(size_t size)
if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
- #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
+ #elif EIGEN_OS_WIN_STRICT
result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
#else
result = handmade_aligned_malloc(size);
@@ -247,7 +247,7 @@ inline void aligned_free(void *ptr)
std::free(ptr);
#elif EIGEN_HAS_MM_MALLOC
_mm_free(ptr);
- #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
+ #elif EIGEN_OS_WIN_STRICT
_aligned_free(ptr);
#else
handmade_aligned_free(ptr);
@@ -274,12 +274,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
// The defined(_mm_free) is just here to verify that this MSVC version
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
// functions. This may not always be the case and we just try to be safe.
- #if defined(_MSC_VER) && (!defined(_WIN32_WCE)) && defined(_mm_free)
+ #if EIGEN_OS_WIN_STRICT && defined(_mm_free)
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
#else
result = generic_aligned_realloc(ptr,new_size,old_size);
#endif
-#elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
+#elif EIGEN_OS_WIN_STRICT
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
@@ -454,6 +454,8 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pt
template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size)
{
+ if(size==0)
+ return 0; // short-cut. Also fixes Bug 884
check_size_for_overflow<T>(size);
T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
if(NumTraits<T>::RequireInitialization)
@@ -521,9 +523,8 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
template<typename Scalar, typename Index>
inline Index first_aligned(const Scalar* array, Index size)
{
- enum { PacketSize = packet_traits<Scalar>::size,
- PacketAlignedMask = PacketSize-1
- };
+ static const Index PacketSize = packet_traits<Scalar>::size;
+ static const Index PacketAlignedMask = PacketSize-1;
if(PacketSize==1)
{
@@ -576,27 +577,27 @@ template<typename T, bool UseMemmove> struct smart_memmove_helper;
template<typename T> void smart_memmove(const T* start, const T* end, T* target)
{
- smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+ smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
}
template<typename T> struct smart_memmove_helper<T,true> {
- static inline void run(const T* start, const T* end, T* target)
- { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
+ static inline void run(const T* start, const T* end, T* target)
+ { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
};
template<typename T> struct smart_memmove_helper<T,false> {
- static inline void run(const T* start, const T* end, T* target)
- {
- if (uintptr_t(target) < uintptr_t(start))
- {
- std::copy(start, end, target);
- }
- else
- {
- std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
- std::copy_backward(start, end, target + count);
- }
+ static inline void run(const T* start, const T* end, T* target)
+ {
+ if (uintptr_t(target) < uintptr_t(start))
+ {
+ std::copy(start, end, target);
}
+ else
+ {
+ std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
+ std::copy_backward(start, end, target + count);
+ }
+ }
};
@@ -607,16 +608,16 @@ template<typename T> struct smart_memmove_helper<T,false> {
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
#ifndef EIGEN_ALLOCA
- #if (defined __linux__) || (defined __APPLE__) || (defined alloca)
+ #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
#define EIGEN_ALLOCA alloca
- #elif defined(_MSC_VER)
+ #elif EIGEN_COMP_MSVC
#define EIGEN_ALLOCA _alloca
#endif
#endif
// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
-template<typename T> class aligned_stack_memory_handler
+template<typename T> class aligned_stack_memory_handler : noncopyable
{
public:
/* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
@@ -644,6 +645,30 @@ template<typename T> class aligned_stack_memory_handler
bool m_deallocate;
};
+template<typename T> class scoped_array : noncopyable
+{
+ T* m_ptr;
+public:
+ explicit scoped_array(std::ptrdiff_t size)
+ {
+ m_ptr = new T[size];
+ }
+ ~scoped_array()
+ {
+ delete[] m_ptr;
+ }
+ T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
+ const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
+ T* &ptr() { return m_ptr; }
+ const T* ptr() const { return m_ptr; }
+ operator const T*() const { return m_ptr; }
+};
+
+template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
+{
+ std::swap(a.ptr(),b.ptr());
+}
+
} // end namespace internal
/** \internal
@@ -786,12 +811,12 @@ public:
//---------- Cache sizes ----------
#if !defined(EIGEN_NO_CPUID)
-# if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
-# if defined(__PIC__) && defined(__i386__)
+# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
+# if defined(__PIC__) && EIGEN_ARCH_i386
// Case for x86 with PIC
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
-# elif defined(__PIC__) && defined(__x86_64__)
+# elif defined(__PIC__) && EIGEN_ARCH_x86_64
// Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
// However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
# define EIGEN_CPUID(abcd,func,id) \
@@ -801,8 +826,8 @@ public:
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
# endif
-# elif defined(_MSC_VER)
-# if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
+# elif EIGEN_COMP_MSVC
+# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
# endif
# endif
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index b99b8849e..f3bafd5af 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -274,18 +274,6 @@ template<typename T> struct scalar_product_traits<std::complex<T>, T>
// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
// };
-template<typename T> struct is_diagonal
-{ enum { ret = false }; };
-
-template<typename T> struct is_diagonal<DiagonalBase<T> >
-{ enum { ret = true }; };
-
-template<typename T> struct is_diagonal<DiagonalWrapper<T> >
-{ enum { ret = true }; };
-
-template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
-{ enum { ret = true }; };
-
} // end namespace internal
namespace numext {
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 59aa0811c..7538a0633 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -26,7 +26,7 @@
#ifndef EIGEN_NO_STATIC_ASSERT
- #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && (_MSC_VER >= 1600))
+ #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (EIGEN_COMP_MSVC >= 1600)
// if native static_assert is enabled, let's use it
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
@@ -84,13 +84,16 @@
THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
+ THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
- OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG
+ OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
+ IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
+ STORAGE_LAYOUT_DOES_NOT_MATCH
};
};
@@ -101,7 +104,7 @@
// Specialized implementation for MSVC to avoid "conditional
// expression is constant" warnings. This implementation doesn't
// appear to work under GCC, hence the multiple implementations.
- #ifdef _MSC_VER
+ #if EIGEN_COMP_MSVC
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
{Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}
@@ -157,7 +160,7 @@
#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
( \
- (int(TYPE0::SizeAtCompileTime)==0 && int(TYPE1::SizeAtCompileTime)==0) \
+ (int(internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \
|| (\
(int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
|| int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 67ca49754..3ac37df58 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -14,7 +14,7 @@
// just a workaround because GCC seems to not really like empty structs
// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled
// so currently we simply disable this optimization for gcc 4.3
-#if (defined __GNUG__) && !((__GNUC__==4) && (__GNUC_MINOR__==3))
+#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
#define EIGEN_EMPTY_STRUCT_CTOR(X) \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
@@ -128,6 +128,17 @@ template<typename _Scalar, int _Rows, int _Cols,
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
class compute_matrix_flags
{
+ enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
+ public:
+ // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
+ // and then propagate this information to the evaluator's flags.
+ // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
+ enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_evaluator_flags
+{
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
@@ -156,7 +167,7 @@ class compute_matrix_flags
};
public:
- enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
+ enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
};
template<int _Rows, int _Cols> struct size_at_compile_time
@@ -164,6 +175,11 @@ template<int _Rows, int _Cols> struct size_at_compile_time
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
};
+template<typename XprType> struct size_of_xpr_at_compile_time
+{
+ enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };
+};
+
/* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type,
* whereas eval is a const reference in the case of a matrix
*/
@@ -174,6 +190,10 @@ template<typename T> struct plain_matrix_type<T,Dense>
{
typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind>::type type;
};
+template<typename T> struct plain_matrix_type<T,DiagonalShape>
+{
+ typedef typename T::PlainObject type;
+};
template<typename T> struct plain_matrix_type_dense<T,MatrixXpr>
{
@@ -216,6 +236,11 @@ template<typename T> struct eval<T,Dense>
// > type;
};
+template<typename T> struct eval<T,DiagonalShape>
+{
+ typedef typename plain_matrix_type<T>::type type;
+};
+
// for matrices, no need to evaluate, just use a const reference to avoid a useless copy
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
@@ -294,38 +319,42 @@ struct transfer_constness
>::type type;
};
-/** \internal Determines how a given expression should be nested into another one.
+
+// When using evaluators, we never evaluate when assembling the expression!!
+// TODO: get rid of this nested class since it's just an alias for ref_selector.
+template<typename T, int n=1, typename PlainObject = void> struct nested
+{
+ typedef typename ref_selector<T>::type type;
+};
+
+// However, we still need a mechanism to detect whether an expression which is evaluated multiple time
+// has to be evaluated into a temporary.
+// That's the purpose of this new nested_eval helper:
+/** \internal Determines how a given expression should be nested when evaluated multiple times.
* For example, when you do a * (b+c), Eigen will determine how the expression b+c should be
- * nested into the bigger product expression. The choice is between nesting the expression b+c as-is, or
+ * evaluated into the bigger product expression. The choice is between nesting the expression b+c as-is, or
* evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is
* a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
* many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
*
- * \param T the type of the expression being nested
+ * \param T the type of the expression being nested.
* \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
- *
- * Note that if no evaluation occur, then the constness of T is preserved.
- *
- * Example. Suppose that a, b, and c are of type Matrix3d. The user forms the expression a*(b+c).
- * b+c is an expression "sum of matrices", which we will denote by S. In order to determine how to nest it,
- * the Product expression uses: nested<S, 3>::type, which turns out to be Matrix3d because the internal logic of
- * nested determined that in this case it was better to evaluate the expression b+c into a temporary. On the other hand,
- * since a is of type Matrix3d, the Product expression nests it as nested<Matrix3d, 3>::type, which turns out to be
- * const Matrix3d&, because the internal logic of nested determined that since a was already a matrix, there was no point
- * in copying it into another matrix.
+ * \param PlainObject the type of the temporary if needed.
*/
-template<typename T, int n=1, typename PlainObject = typename eval<T>::type> struct nested
+template<typename T, int n, typename PlainObject = typename eval<T>::type> struct nested_eval
{
enum {
- // for the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values.
+ // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values.
// the choice of 10000 makes it larger than any practical fixed value and even most dynamic values.
// in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues
// (poor choice of temporaries).
- // it's important that this value can still be squared without integer overflowing.
+ // It's important that this value can still be squared without integer overflowing.
DynamicAsInteger = 10000,
ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost),
- CoeffReadCost = traits<T>::CoeffReadCost,
+ CoeffReadCost = evaluator<T>::CoeffReadCost, // TODO What if an evaluator evaluate itself into a tempory?
+ // Then CoeffReadCost will be small but we still have to evaluate if n>1...
+ // The solution might be to ask the evaluator if it creates a temp. Perhaps we could even ask the number of temps?
CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost),
NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n,
CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger,
@@ -333,17 +362,16 @@ template<typename T, int n=1, typename PlainObject = typename eval<T>::type> str
};
typedef typename conditional<
- ( (int(traits<T>::Flags) & EvalBeforeNestingBit) ||
- int(CostEvalAsInteger) < int(CostNoEvalAsInteger)
- ),
- PlainObject,
- typename ref_selector<T>::type
+ ( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) ||
+ (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ),
+ PlainObject,
+ typename ref_selector<T>::type
>::type type;
};
template<typename T>
EIGEN_DEVICE_FUNC
-T* const_cast_ptr(const T* ptr)
+inline T* const_cast_ptr(const T* ptr)
{
return const_cast<T*>(ptr);
}
@@ -366,6 +394,15 @@ struct dense_xpr_base<Derived, ArrayXpr>
typedef ArrayBase<Derived> type;
};
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind>
+struct generic_xpr_base;
+
+template<typename Derived, typename XprKind>
+struct generic_xpr_base<Derived, XprKind, Dense>
+{
+ typedef typename dense_xpr_base<Derived,XprKind>::type type;
+};
+
/** \internal Helper base class to add a scalar multiple operator
* overloads for complex types */
template<typename Derived,typename Scalar,typename OtherScalar,
@@ -424,6 +461,60 @@ template <typename A> struct promote_storage_type<const A, A>
typedef A ret;
};
+/** \internal Specify the "storage kind" of applying a coefficient-wise
+ * binary operations between two expressions of kinds A and B respectively.
+ * The template parameter Functor permits to specialize the resulting storage kind wrt to
+ * the functor.
+ * The default rules are as follows:
+ * \code
+ * A op A -> A
+ * A op dense -> dense
+ * dense op B -> dense
+ * A * dense -> A
+ * dense * B -> B
+ * \endcode
+ */
+template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
+
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
+template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
+template <typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef Dense ret; };
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
+template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
+template <typename A, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<A,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef A ret; };
+template <typename B, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,B,scalar_product_op<ScalarA,ScalarB> > { typedef B ret; };
+
+/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
+ * The template parameter ProductTag permits to specialize the resulting storage kind wrt to
+ * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct.
+ * The default rules are as follows:
+ * \code
+ * K * K -> K
+ * dense * K -> dense
+ * K * dense -> dense
+ * diag * K -> K
+ * K * diag -> K
+ * Perm * K -> K
+ * K * Perm -> K
+ * \endcode
+ */
+template <typename A, typename B, int ProductTag> struct product_promote_storage_type;
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A, A, ProductTag> { typedef A ret;};
+template <int ProductTag> struct product_promote_storage_type<Dense, Dense, ProductTag> { typedef Dense ret;};
+template <typename A, int ProductTag> struct product_promote_storage_type<A, Dense, ProductTag> { typedef Dense ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<Dense, B, ProductTag> { typedef Dense ret; };
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A, DiagonalShape, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape, B, ProductTag> { typedef B ret; };
+template <int ProductTag> struct product_promote_storage_type<Dense, DiagonalShape, ProductTag> { typedef Dense ret; };
+template <int ProductTag> struct product_promote_storage_type<DiagonalShape, Dense, ProductTag> { typedef Dense ret; };
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A, PermutationStorage, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B, ProductTag> { typedef B ret; };
+template <int ProductTag> struct product_promote_storage_type<Dense, PermutationStorage, ProductTag> { typedef Dense ret; };
+template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; };
+
/** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.
* \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
*/
@@ -480,8 +571,36 @@ struct is_lvalue
bool(traits<ExpressionType>::Flags & LvalueBit) };
};
+template<typename T> struct is_diagonal
+{ enum { ret = false }; };
+
+template<typename T> struct is_diagonal<DiagonalBase<T> >
+{ enum { ret = true }; };
+
+template<typename T> struct is_diagonal<DiagonalWrapper<T> >
+{ enum { ret = true }; };
+
+template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
+{ enum { ret = true }; };
+
+template<typename S1, typename S2> struct glue_shapes;
+template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; };
+
} // end namespace internal
+// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
+// that would take two operands of different types. If there were such an example, then this check should be
+// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
+// currently they take only one typename Scalar template parameter.
+// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
+// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
+// add together a float matrix and a double matrix.
+#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
+ EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
+ ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
+ : int(internal::is_same<LHS, RHS>::value)), \
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
} // end namespace Eigen
#endif // EIGEN_XPRHELPER_H
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index af434bc9b..25082546e 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -104,7 +104,7 @@ template<typename _MatrixType> class ComplexEigenSolver
* according to the specified problem \a size.
* \sa ComplexEigenSolver()
*/
- ComplexEigenSolver(Index size)
+ explicit ComplexEigenSolver(Index size)
: m_eivec(size, size),
m_eivalues(size),
m_schur(size),
@@ -122,7 +122,7 @@ template<typename _MatrixType> class ComplexEigenSolver
*
* This constructor calls compute() to compute the eigendecomposition.
*/
- ComplexEigenSolver(const MatrixType& matrix, bool computeEigenvectors = true)
+ explicit ComplexEigenSolver(const MatrixType& matrix, bool computeEigenvectors = true)
: m_eivec(matrix.rows(),matrix.cols()),
m_eivalues(matrix.cols()),
m_schur(matrix.rows()),
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index 89e6cade3..a3a5a4649 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -91,7 +91,7 @@ template<typename _MatrixType> class ComplexSchur
*
* \sa compute() for an example.
*/
- ComplexSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)
+ explicit ComplexSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)
: m_matT(size,size),
m_matU(size,size),
m_hess(size),
@@ -109,7 +109,7 @@ template<typename _MatrixType> class ComplexSchur
*
* \sa matrixT() and matrixU() for examples.
*/
- ComplexSchur(const MatrixType& matrix, bool computeU = true)
+ explicit ComplexSchur(const MatrixType& matrix, bool computeU = true)
: m_matT(matrix.rows(),matrix.cols()),
m_matU(matrix.rows(),matrix.cols()),
m_hess(matrix.rows()),
diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
index 91496ae5b..27aed923c 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
@@ -45,7 +45,6 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \
- typedef MatrixType::Scalar Scalar; \
typedef MatrixType::RealScalar RealScalar; \
typedef std::complex<RealScalar> ComplexScalar; \
\
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index d2563d470..9372021ff 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -118,7 +118,7 @@ template<typename _MatrixType> class EigenSolver
* according to the specified problem \a size.
* \sa EigenSolver()
*/
- EigenSolver(Index size)
+ explicit EigenSolver(Index size)
: m_eivec(size, size),
m_eivalues(size),
m_isInitialized(false),
@@ -143,7 +143,7 @@ template<typename _MatrixType> class EigenSolver
*
* \sa compute()
*/
- EigenSolver(const MatrixType& matrix, bool computeEigenvectors = true)
+ explicit EigenSolver(const MatrixType& matrix, bool computeEigenvectors = true)
: m_eivec(matrix.rows(), matrix.cols()),
m_eivalues(matrix.cols()),
m_isInitialized(false),
@@ -368,7 +368,6 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
{
using std::sqrt;
using std::abs;
- using std::max;
using numext::isfinite;
eigen_assert(matrix.cols() == matrix.rows());
@@ -409,7 +408,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
{
Scalar t0 = m_matT.coeff(i+1, i);
Scalar t1 = m_matT.coeff(i, i+1);
- Scalar maxval = (max)(abs(p),(max)(abs(t0),abs(t1)));
+ Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1)));
t0 /= maxval;
t1 /= maxval;
Scalar p0 = p/maxval;
@@ -600,8 +599,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
}
// Overflow control
- EIGEN_USING_STD_MATH(max);
- Scalar t = (max)(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
+ Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
if ((eps * t) * t > Scalar(1))
m_matT.block(i, n-1, size-i, 2) /= t;
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index dc240e13e..c20ea03e6 100644
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -122,7 +122,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
* according to the specified problem \a size.
* \sa GeneralizedEigenSolver()
*/
- GeneralizedEigenSolver(Index size)
+ explicit GeneralizedEigenSolver(Index size)
: m_eivec(size, size),
m_alphas(size),
m_betas(size),
@@ -145,7 +145,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
*
* \sa compute()
*/
- GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)
+ explicit GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)
: m_eivec(A.rows(), A.cols()),
m_alphas(A.cols()),
m_betas(A.cols()),
diff --git a/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
index 07bf1ea09..1ce1f5f58 100644
--- a/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
@@ -74,7 +74,7 @@ class GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<_MatrixT
*
* \sa compute() for an example
*/
- GeneralizedSelfAdjointEigenSolver(Index size)
+ explicit GeneralizedSelfAdjointEigenSolver(Index size)
: Base(size)
{}
diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
index 3db0c0106..2615a9f23 100644
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -97,7 +97,7 @@ template<typename _MatrixType> class HessenbergDecomposition
*
* \sa compute() for an example.
*/
- HessenbergDecomposition(Index size = Size==Dynamic ? 2 : Size)
+ explicit HessenbergDecomposition(Index size = Size==Dynamic ? 2 : Size)
: m_matrix(size,size),
m_temp(size),
m_isInitialized(false)
@@ -115,7 +115,7 @@ template<typename _MatrixType> class HessenbergDecomposition
*
* \sa matrixH() for an example.
*/
- HessenbergDecomposition(const MatrixType& matrix)
+ explicit HessenbergDecomposition(const MatrixType& matrix)
: m_matrix(matrix),
m_temp(matrix.rows()),
m_isInitialized(false)
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index 5706eeebe..128ef9028 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -83,7 +83,7 @@ namespace Eigen {
*
* \sa compute() for an example.
*/
- RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) :
+ explicit RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) :
m_S(size, size),
m_T(size, size),
m_Q(size, size),
@@ -101,7 +101,7 @@ namespace Eigen {
*
* This constructor calls compute() to compute the QZ decomposition.
*/
- RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
+ explicit RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
m_S(A.rows(),A.cols()),
m_T(A.rows(),A.cols()),
m_Q(A.rows(),A.cols()),
@@ -313,7 +313,7 @@ namespace Eigen {
using std::abs;
using std::sqrt;
const Index dim=m_S.cols();
- if (abs(m_S.coeff(i+1,i)==Scalar(0)))
+ if (abs(m_S.coeff(i+1,i))==Scalar(0))
return;
Index z = findSmallDiagEntry(i,i+1);
if (z==i-1)
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 64d136341..51e61ba38 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -80,7 +80,7 @@ template<typename _MatrixType> class RealSchur
*
* \sa compute() for an example.
*/
- RealSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)
+ explicit RealSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)
: m_matT(size, size),
m_matU(size, size),
m_workspaceVector(size),
@@ -100,7 +100,7 @@ template<typename _MatrixType> class RealSchur
* Example: \include RealSchur_RealSchur_MatrixType.cpp
* Output: \verbinclude RealSchur_RealSchur_MatrixType.out
*/
- RealSchur(const MatrixType& matrix, bool computeU = true)
+ explicit RealSchur(const MatrixType& matrix, bool computeU = true)
: m_matT(matrix.rows(),matrix.cols()),
m_matU(matrix.rows(),matrix.cols()),
m_workspaceVector(matrix.rows()),
@@ -234,7 +234,7 @@ template<typename _MatrixType> class RealSchur
typedef Matrix<Scalar,3,1> Vector3s;
Scalar computeNormOfT();
- Index findSmallSubdiagEntry(Index iu, const Scalar& norm);
+ Index findSmallSubdiagEntry(Index iu);
void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
@@ -286,7 +286,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
{
while (iu >= 0)
{
- Index il = findSmallSubdiagEntry(iu, norm);
+ Index il = findSmallSubdiagEntry(iu);
// Check for convergence
if (il == iu) // One root found
@@ -343,16 +343,14 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
/** \internal Look for single small sub-diagonal element and returns its index */
template<typename MatrixType>
-inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& norm)
+inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu)
{
using std::abs;
Index res = iu;
while (res > 0)
{
Scalar s = abs(m_matT.coeff(res-1,res-1)) + abs(m_matT.coeff(res,res));
- if (s == 0.0)
- s = norm;
- if (abs(m_matT.coeff(res,res-1)) < NumTraits<Scalar>::epsilon() * s)
+ if (abs(m_matT.coeff(res,res-1)) <= NumTraits<Scalar>::epsilon() * s)
break;
res--;
}
@@ -457,9 +455,7 @@ inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const V
const Scalar lhs = m_matT.coeff(im,im-1) * (abs(v.coeff(1)) + abs(v.coeff(2)));
const Scalar rhs = v.coeff(0) * (abs(m_matT.coeff(im-1,im-1)) + abs(Tmm) + abs(m_matT.coeff(im+1,im+1)));
if (abs(lhs) < NumTraits<Scalar>::epsilon() * rhs)
- {
break;
- }
}
}
diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h
index ad9736460..c3089b468 100644
--- a/Eigen/src/Eigenvalues/RealSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h
@@ -44,10 +44,6 @@ template<> inline \
RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
{ \
- typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \
- typedef MatrixType::Scalar Scalar; \
- typedef MatrixType::RealScalar RealScalar; \
-\
eigen_assert(matrix.cols() == matrix.rows()); \
\
lapack_int n = matrix.cols(), sdim, info; \
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index a6bbdac6b..54f60b197 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -133,7 +133,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
* \sa compute() for an example
*/
EIGEN_DEVICE_FUNC
- SelfAdjointEigenSolver(Index size)
+ explicit SelfAdjointEigenSolver(Index size)
: m_eivec(size, size),
m_eivalues(size),
m_subdiag(size > 1 ? size - 1 : 1),
@@ -156,7 +156,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
* \sa compute(const MatrixType&, int)
*/
EIGEN_DEVICE_FUNC
- SelfAdjointEigenSolver(const MatrixType& matrix, int options = ComputeEigenvectors)
+ explicit SelfAdjointEigenSolver(const MatrixType& matrix, int options = ComputeEigenvectors)
: m_eivec(matrix.rows(), matrix.cols()),
m_eivalues(matrix.cols()),
m_subdiag(matrix.rows() > 1 ? matrix.rows() - 1 : 1),
@@ -732,7 +732,6 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
EIGEN_DEVICE_FUNC
static inline void run(SolverType& solver, const MatrixType& mat, int options)
{
- EIGEN_USING_STD_MATH(max)
EIGEN_USING_STD_MATH(sqrt);
eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());
@@ -746,7 +745,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
// map the matrix coefficients to [-1:1] to avoid over- and underflow.
Scalar scale = mat.cwiseAbs().maxCoeff();
- scale = (max)(scale,Scalar(1));
+ scale = numext::maxi(scale,Scalar(1));
MatrixType scaledMat = mat / scale;
// Compute the eigenvalues
diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h
index 192278d68..bedd1cb34 100644
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -18,8 +18,10 @@ namespace internal {
template<typename MatrixType> struct TridiagonalizationMatrixTReturnType;
template<typename MatrixType>
struct traits<TridiagonalizationMatrixTReturnType<MatrixType> >
+ : public traits<typename MatrixType::PlainObject>
{
- typedef typename MatrixType::PlainObject ReturnType;
+ typedef typename MatrixType::PlainObject ReturnType; // FIXME shall it be a BandMatrix?
+ enum { Flags = 0 };
};
template<typename MatrixType, typename CoeffVectorType>
@@ -89,10 +91,8 @@ template<typename _MatrixType> class Tridiagonalization
>::type DiagonalReturnType;
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
- typename internal::add_const_on_value_type<typename Diagonal<
- Block<const MatrixType,SizeMinusOne,SizeMinusOne> >::RealReturnType>::type,
- const Diagonal<
- Block<const MatrixType,SizeMinusOne,SizeMinusOne> >
+ typename internal::add_const_on_value_type<typename Diagonal<const MatrixType, -1>::RealReturnType>::type,
+ const Diagonal<const MatrixType, -1>
>::type SubDiagonalReturnType;
/** \brief Return type of matrixQ() */
@@ -110,7 +110,7 @@ template<typename _MatrixType> class Tridiagonalization
*
* \sa compute() for an example.
*/
- Tridiagonalization(Index size = Size==Dynamic ? 2 : Size)
+ explicit Tridiagonalization(Index size = Size==Dynamic ? 2 : Size)
: m_matrix(size,size),
m_hCoeffs(size > 1 ? size-1 : 1),
m_isInitialized(false)
@@ -126,7 +126,7 @@ template<typename _MatrixType> class Tridiagonalization
* Example: \include Tridiagonalization_Tridiagonalization_MatrixType.cpp
* Output: \verbinclude Tridiagonalization_Tridiagonalization_MatrixType.out
*/
- Tridiagonalization(const MatrixType& matrix)
+ explicit Tridiagonalization(const MatrixType& matrix)
: m_matrix(matrix),
m_hCoeffs(matrix.cols() > 1 ? matrix.cols()-1 : 1),
m_isInitialized(false)
@@ -305,7 +305,7 @@ typename Tridiagonalization<MatrixType>::DiagonalReturnType
Tridiagonalization<MatrixType>::diagonal() const
{
eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
- return m_matrix.diagonal();
+ return m_matrix.diagonal().real();
}
template<typename MatrixType>
@@ -313,8 +313,7 @@ typename Tridiagonalization<MatrixType>::SubDiagonalReturnType
Tridiagonalization<MatrixType>::subDiagonal() const
{
eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
- Index n = m_matrix.rows();
- return Block<const MatrixType,SizeMinusOne,SizeMinusOne>(m_matrix, 1, 0, n-1,n-1).diagonal();
+ return m_matrix.template diagonal<-1>().real();
}
namespace internal {
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index b6a2f0e24..d6c5c1293 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -71,7 +71,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
template<typename Derived>
inline explicit AlignedBox(const MatrixBase<Derived>& a_p)
{
- typename internal::nested<Derived,2>::type p(a_p.derived());
+ typename internal::nested_eval<Derived,2>::type p(a_p.derived());
m_min = p;
m_max = p;
}
diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h
index 97dd21d15..7f1907542 100644
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -48,8 +48,7 @@ struct traits<Homogeneous<MatrixType,Direction> >
TmpFlags = _MatrixTypeNested::Flags & HereditaryBits,
Flags = ColsAtCompileTime==1 ? (TmpFlags & ~RowMajorBit)
: RowsAtCompileTime==1 ? (TmpFlags | RowMajorBit)
- : TmpFlags,
- CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+ : TmpFlags
};
};
@@ -59,52 +58,54 @@ template<typename MatrixType,typename Rhs> struct homogeneous_right_product_impl
} // end namespace internal
template<typename MatrixType,int _Direction> class Homogeneous
- : internal::no_assignment_operator, public MatrixBase<Homogeneous<MatrixType,_Direction> >
+ : public MatrixBase<Homogeneous<MatrixType,_Direction> >, internal::no_assignment_operator
{
public:
+ typedef MatrixType NestedExpression;
enum { Direction = _Direction };
typedef MatrixBase<Homogeneous> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)
- inline Homogeneous(const MatrixType& matrix)
+ explicit inline Homogeneous(const MatrixType& matrix)
: m_matrix(matrix)
{}
inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); }
inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
-
- inline Scalar coeff(Index row, Index col) const
- {
- if( (int(Direction)==Vertical && row==m_matrix.rows())
- || (int(Direction)==Horizontal && col==m_matrix.cols()))
- return 1;
- return m_matrix.coeff(row, col);
- }
+
+ const NestedExpression& nestedExpression() const { return m_matrix; }
template<typename Rhs>
- inline const internal::homogeneous_right_product_impl<Homogeneous,Rhs>
+ inline const Product<Homogeneous,Rhs>
operator* (const MatrixBase<Rhs>& rhs) const
{
eigen_assert(int(Direction)==Horizontal);
- return internal::homogeneous_right_product_impl<Homogeneous,Rhs>(m_matrix,rhs.derived());
+ return Product<Homogeneous,Rhs>(*this,rhs.derived());
}
template<typename Lhs> friend
- inline const internal::homogeneous_left_product_impl<Homogeneous,Lhs>
+ inline const Product<Lhs,Homogeneous>
operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
{
eigen_assert(int(Direction)==Vertical);
- return internal::homogeneous_left_product_impl<Homogeneous,Lhs>(lhs.derived(),rhs.m_matrix);
+ return Product<Lhs,Homogeneous>(lhs.derived(),rhs);
}
template<typename Scalar, int Dim, int Mode, int Options> friend
- inline const internal::homogeneous_left_product_impl<Homogeneous,Transform<Scalar,Dim,Mode,Options> >
+ inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)
{
eigen_assert(int(Direction)==Vertical);
- return internal::homogeneous_left_product_impl<Homogeneous,Transform<Scalar,Dim,Mode,Options> >(lhs,rhs.m_matrix);
+ return Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous>(lhs,rhs);
+ }
+
+ template<typename Func>
+ EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar)>::type
+ redux(const Func& func) const
+ {
+ return func(m_matrix.redux(func), Scalar(1));
}
protected:
@@ -127,7 +128,7 @@ inline typename MatrixBase<Derived>::HomogeneousReturnType
MatrixBase<Derived>::homogeneous() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
- return derived();
+ return HomogeneousReturnType(derived());
}
/** \geometry_module
@@ -142,7 +143,7 @@ template<typename ExpressionType, int Direction>
inline Homogeneous<ExpressionType,Direction>
VectorwiseOp<ExpressionType,Direction>::homogeneous() const
{
- return _expression();
+ return HomogeneousReturnType(_expression());
}
/** \geometry_module
@@ -300,6 +301,93 @@ struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
typename Rhs::Nested m_rhs;
};
+template<typename ArgType,int Direction>
+struct evaluator_traits<Homogeneous<ArgType,Direction> >
+{
+ typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind;
+ typedef HomogeneousShape Shape;
+ static const int AssumeAliasing = 0;
+};
+
+template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; };
+
+
+template<typename ArgType,int Direction>
+struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
+ : evaluator<typename Homogeneous<ArgType,Direction>::PlainObject >::type
+{
+ typedef Homogeneous<ArgType,Direction> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ typedef evaluator<XprType> type;
+ typedef evaluator<XprType> nestedType;
+
+ explicit unary_evaluator(const XprType& op)
+ : Base(), m_temp(op)
+ {
+ ::new (static_cast<Base*>(this)) Base(m_temp);
+ }
+
+protected:
+ PlainObject m_temp;
+};
+
+// dense = homogeneous
+template< typename DstXprType, typename ArgType, typename Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef Homogeneous<ArgType,Vertical> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
+ dst.row(dst.rows()-1).setOnes();
+ }
+};
+
+// dense = homogeneous
+template< typename DstXprType, typename ArgType, typename Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef Homogeneous<ArgType,Horizontal> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
+ dst.col(dst.cols()-1).setOnes();
+ }
+};
+
+template<typename LhsArg, typename Rhs, int ProductTag>
+struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
+ {
+ homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);
+ }
+};
+
+template<typename Lhs, typename RhsArg, int ProductTag>
+struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+ {
+ homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
+ }
+};
+
+template<typename Scalar, int Dim, int Mode,int Options, typename RhsArg, int ProductTag>
+struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
+{
+ typedef Transform<Scalar,Dim,Mode,Options> TransformType;
+ template<typename Dest>
+ static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+ {
+ homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);
+ }
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h
index aeff43fef..00b7c4300 100644
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -100,7 +100,17 @@ public:
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
Hyperplane result(p0.size());
- result.normal() = (p2 - p0).cross(p1 - p0).normalized();
+ VectorType v0(p2 - p0), v1(p1 - p0);
+ result.normal() = v0.cross(v1);
+ RealScalar norm = result.normal().norm();
+ if(norm <= v0.norm() * v1.norm() * NumTraits<RealScalar>::epsilon())
+ {
+ Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
+ JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
+ result.normal() = svd.matrixV().col(2);
+ }
+ else
+ result.normal() /= norm;
result.offset() = -p0.dot(result.normal());
return result;
}
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 26be3ee5b..a245c79d3 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -30,8 +30,8 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
// Note that there is no need for an expression here since the compiler
// optimize such a small temporary very well (even within a complex expression)
- typename internal::nested<Derived,2>::type lhs(derived());
- typename internal::nested<OtherDerived,2>::type rhs(other.derived());
+ typename internal::nested_eval<Derived,2>::type lhs(derived());
+ typename internal::nested_eval<OtherDerived,2>::type rhs(other.derived());
return typename cross_product_return_type<OtherDerived>::type(
numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
@@ -76,8 +76,8 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4)
- typedef typename internal::nested<Derived,2>::type DerivedNested;
- typedef typename internal::nested<OtherDerived,2>::type OtherDerivedNested;
+ typedef typename internal::nested_eval<Derived,2>::type DerivedNested;
+ typedef typename internal::nested_eval<OtherDerived,2>::type OtherDerivedNested;
DerivedNested lhs(derived());
OtherDerivedNested rhs(other.derived());
@@ -103,21 +103,24 @@ VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& ot
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+ typename internal::nested_eval<ExpressionType,2>::type mat(_expression());
+ typename internal::nested_eval<OtherDerived,2>::type vec(other.derived());
CrossReturnType res(_expression().rows(),_expression().cols());
if(Direction==Vertical)
{
eigen_assert(CrossReturnType::RowsAtCompileTime==3 && "the matrix must have exactly 3 rows");
- res.row(0) = (_expression().row(1) * other.coeff(2) - _expression().row(2) * other.coeff(1)).conjugate();
- res.row(1) = (_expression().row(2) * other.coeff(0) - _expression().row(0) * other.coeff(2)).conjugate();
- res.row(2) = (_expression().row(0) * other.coeff(1) - _expression().row(1) * other.coeff(0)).conjugate();
+ res.row(0) = (mat.row(1) * vec.coeff(2) - mat.row(2) * vec.coeff(1)).conjugate();
+ res.row(1) = (mat.row(2) * vec.coeff(0) - mat.row(0) * vec.coeff(2)).conjugate();
+ res.row(2) = (mat.row(0) * vec.coeff(1) - mat.row(1) * vec.coeff(0)).conjugate();
}
else
{
eigen_assert(CrossReturnType::ColsAtCompileTime==3 && "the matrix must have exactly 3 columns");
- res.col(0) = (_expression().col(1) * other.coeff(2) - _expression().col(2) * other.coeff(1)).conjugate();
- res.col(1) = (_expression().col(2) * other.coeff(0) - _expression().col(0) * other.coeff(2)).conjugate();
- res.col(2) = (_expression().col(0) * other.coeff(1) - _expression().col(1) * other.coeff(0)).conjugate();
+ res.col(0) = (mat.col(1) * vec.coeff(2) - mat.col(2) * vec.coeff(1)).conjugate();
+ res.col(1) = (mat.col(2) * vec.coeff(0) - mat.col(0) * vec.coeff(2)).conjugate();
+ res.col(2) = (mat.col(0) * vec.coeff(1) - mat.col(1) * vec.coeff(0)).conjugate();
}
return res;
}
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 11e5398d4..508eba767 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -217,7 +217,7 @@ struct traits<Quaternion<_Scalar,_Options> >
typedef _Scalar Scalar;
typedef Matrix<_Scalar,4,1,_Options> Coefficients;
enum{
- IsAligned = internal::traits<Coefficients>::Flags & AlignedBit,
+ IsAligned = (internal::traits<Coefficients>::EvaluatorFlags & AlignedBit) != 0,
Flags = IsAligned ? (AlignedBit | LvalueBit) : LvalueBit
};
};
@@ -251,7 +251,7 @@ public:
inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
/** Constructs and initialize a quaternion from the array data */
- inline Quaternion(const Scalar* data) : m_coeffs(data) {}
+ explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
/** Copy constructor */
template<class Derived> EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
@@ -351,7 +351,7 @@ class Map<const Quaternion<_Scalar>, _Options >
* \code *coeffs == {x, y, z, w} \endcode
*
* If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
- EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
+ explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
inline const Coefficients& coeffs() const { return m_coeffs;}
@@ -388,7 +388,7 @@ class Map<Quaternion<_Scalar>, _Options >
* \code *coeffs == {x, y, z, w} \endcode
*
* If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
- EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
+ explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
inline Coefficients& coeffs() { return m_coeffs; }
inline const Coefficients& coeffs() const { return m_coeffs; }
@@ -571,7 +571,6 @@ template<class Derived>
template<typename Derived1, typename Derived2>
inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
{
- EIGEN_USING_STD_MATH(max);
using std::sqrt;
Vector3 v0 = a.normalized();
Vector3 v1 = b.normalized();
@@ -587,7 +586,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
// which yields a singular value problem
if (c < Scalar(-1)+NumTraits<Scalar>::dummy_precision())
{
- c = (max)(c,Scalar(-1));
+ c = numext::maxi(c,Scalar(-1));
Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
Vector3 axis = svd.matrixV().col(2);
diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h
index 1cac343a5..4feb3d4d2 100644
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -59,7 +59,10 @@ protected:
public:
/** Construct a 2D counter clock wise rotation from the angle \a a in radian. */
- inline Rotation2D(const Scalar& a) : m_angle(a) {}
+ explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
+
+ /** Default constructor wihtout initialization. The represented rotation is undefined. */
+ Rotation2D() {}
/** \returns the rotation angle */
inline Scalar angle() const { return m_angle; }
@@ -68,11 +71,11 @@ public:
inline Scalar& angle() { return m_angle; }
/** \returns the inverse rotation */
- inline Rotation2D inverse() const { return -m_angle; }
+ inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
/** Concatenates two rotations */
inline Rotation2D operator*(const Rotation2D& other) const
- { return m_angle + other.m_angle; }
+ { return Rotation2D(m_angle + other.m_angle); }
/** Concatenates two rotations */
inline Rotation2D& operator*=(const Rotation2D& other)
@@ -81,16 +84,16 @@ public:
/** Applies the rotation to a 2D vector */
Vector2 operator* (const Vector2& vec) const
{ return toRotationMatrix() * vec; }
-
+
template<typename Derived>
Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
- Matrix2 toRotationMatrix(void) const;
+ Matrix2 toRotationMatrix() const;
/** \returns the spherical interpolation between \c *this and \a other using
* parameter \a t. It is in fact equivalent to a linear interpolation.
*/
inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
- { return m_angle * (1-t) + other.angle() * t; }
+ { return Rotation2D(m_angle * (1-t) + other.angle() * t); }
/** \returns \c *this with scalar type casted to \a NewScalarType
*
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index cb93acf6b..d33fc24db 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -62,6 +62,24 @@ struct transform_construct_from_matrix;
template<typename TransformType> struct transform_take_affine_part;
+template<typename _Scalar, int _Dim, int _Mode, int _Options>
+struct traits<Transform<_Scalar,_Dim,_Mode,_Options> >
+{
+ typedef _Scalar Scalar;
+ typedef DenseIndex Index;
+ typedef Dense StorageKind;
+ enum {
+ Dim1 = _Dim==Dynamic ? _Dim : _Dim + 1,
+ RowsAtCompileTime = _Mode==Projective ? Dim1 : _Dim,
+ ColsAtCompileTime = Dim1,
+ MaxRowsAtCompileTime = RowsAtCompileTime,
+ MaxColsAtCompileTime = ColsAtCompileTime,
+ Flags = 0
+ };
+};
+
+template<int Mode> struct transform_make_affine;
+
} // end namespace internal
/** \geometry_module \ingroup Geometry_Module
@@ -230,8 +248,7 @@ public:
inline Transform()
{
check_template_params();
- if (int(Mode)==Affine)
- makeAffine();
+ internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
}
inline Transform(const Transform& other)
@@ -355,6 +372,9 @@ public:
inline Transform& operator=(const QTransform& other);
inline QTransform toQTransform(void) const;
#endif
+
+ Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
+ Index cols() const { return m_matrix.cols(); }
/** shortcut for m_matrix(row,col);
* \sa MatrixBase::operator(Index,Index) const */
@@ -454,7 +474,7 @@ public:
return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
}
- #ifdef __INTEL_COMPILER
+ #if EIGEN_COMP_ICC
private:
// this intermediate structure permits to workaround a bug in ICC 11:
// error: template instantiation resulted in unexpected function type of "Eigen::Transform<double, 3, 32, 0>
@@ -591,11 +611,7 @@ public:
*/
void makeAffine()
{
- if(int(Mode)!=int(AffineCompact))
- {
- matrix().template block<1,Dim>(Dim,0).setZero();
- matrix().coeffRef(Dim,Dim) = Scalar(1);
- }
+ internal::transform_make_affine<int(Mode)>::run(m_matrix);
}
/** \internal
@@ -1083,6 +1099,24 @@ Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBas
namespace internal {
+template<int Mode>
+struct transform_make_affine
+{
+ template<typename MatrixType>
+ static void run(MatrixType &mat)
+ {
+ static const int Dim = MatrixType::ColsAtCompileTime-1;
+ mat.template block<1,Dim>(Dim,0).setZero();
+ mat.coeffRef(Dim,Dim) = typename MatrixType::Scalar(1);
+ }
+};
+
+template<>
+struct transform_make_affine<AffineCompact>
+{
+ template<typename MatrixType> static void run(MatrixType &) { }
+};
+
// selector needed to avoid taking the inverse of a 3x4 matrix
template<typename TransformType, int Mode=TransformType::Mode>
struct projective_transform_inverse
diff --git a/Eigen/src/Householder/BlockHouseholder.h b/Eigen/src/Householder/BlockHouseholder.h
index 60dbea5f5..35dbf80a1 100644
--- a/Eigen/src/Householder/BlockHouseholder.h
+++ b/Eigen/src/Householder/BlockHouseholder.h
@@ -16,48 +16,85 @@
namespace Eigen {
namespace internal {
+
+/** \internal */
+// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
+// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
+// {
+// typedef typename TriangularFactorType::Index Index;
+// typedef typename VectorsType::Scalar Scalar;
+// const Index nbVecs = vectors.cols();
+// eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
+//
+// for(Index i = 0; i < nbVecs; i++)
+// {
+// Index rs = vectors.rows() - i;
+// // Warning, note that hCoeffs may alias with vectors.
+// // It is then necessary to copy it before modifying vectors(i,i).
+// typename CoeffsType::Scalar h = hCoeffs(i);
+// // This hack permits to pass trough nested Block<> and Transpose<> expressions.
+// Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i);
+// Scalar Vii = *Vii_ptr;
+// *Vii_ptr = Scalar(1);
+// triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint()
+// * vectors.col(i).tail(rs);
+// *Vii_ptr = Vii;
+// // FIXME add .noalias() once the triangular product can work inplace
+// triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
+// * triFactor.col(i).head(i);
+// triFactor(i,i) = hCoeffs(i);
+// }
+// }
/** \internal */
+// This variant avoid modifications in vectors
template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
{
typedef typename TriangularFactorType::Index Index;
- typedef typename VectorsType::Scalar Scalar;
const Index nbVecs = vectors.cols();
eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
- for(Index i = 0; i < nbVecs; i++)
+ for(Index i = nbVecs-1; i >=0 ; --i)
{
- Index rs = vectors.rows() - i;
- Scalar Vii = vectors(i,i);
- vectors.const_cast_derived().coeffRef(i,i) = Scalar(1);
- triFactor.col(i).head(i).noalias() = -hCoeffs(i) * vectors.block(i, 0, rs, i).adjoint()
- * vectors.col(i).tail(rs);
- vectors.const_cast_derived().coeffRef(i, i) = Vii;
- // FIXME add .noalias() once the triangular product can work inplace
- triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
- * triFactor.col(i).head(i);
+ Index rs = vectors.rows() - i - 1;
+ Index rt = nbVecs-i-1;
+
+ if(rt>0)
+ {
+ triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
+ * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();
+
+ // FIXME add .noalias() once the triangular product can work inplace
+ triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();
+
+ }
triFactor(i,i) = hCoeffs(i);
}
}
-/** \internal */
+/** \internal
+ * if forward then perform mat = H0 * H1 * H2 * mat
+ * otherwise perform mat = H2 * H1 * H0 * mat
+ */
template<typename MatrixType,typename VectorsType,typename CoeffsType>
-void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs)
+void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)
{
typedef typename MatrixType::Index Index;
enum { TFactorSize = MatrixType::ColsAtCompileTime };
Index nbVecs = vectors.cols();
- Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, ColMajor> T(nbVecs,nbVecs);
- make_block_householder_triangular_factor(T, vectors, hCoeffs);
-
- const TriangularView<const VectorsType, UnitLower>& V(vectors);
+ Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);
+
+ if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs);
+ else make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate());
+ const TriangularView<const VectorsType, UnitLower> V(vectors);
// A -= V T V^* A
Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0,
VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
// FIXME add .noalias() once the triangular product can work inplace
- tmp = T.template triangularView<Upper>().adjoint() * tmp;
+ if(forward) tmp = T.template triangularView<Upper>() * tmp;
+ else tmp = T.template triangularView<Upper>().adjoint() * tmp;
mat.noalias() -= V * tmp;
}
diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h
index d800ca1fa..4ded2995f 100644
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@@ -73,6 +73,15 @@ struct traits<HouseholderSequence<VectorsType,CoeffsType,Side> >
};
};
+struct HouseholderSequenceShape {};
+
+template<typename VectorsType, typename CoeffsType, int Side>
+struct evaluator_traits<HouseholderSequence<VectorsType,CoeffsType,Side> >
+ : public evaluator_traits_base<HouseholderSequence<VectorsType,CoeffsType,Side> >
+{
+ typedef HouseholderSequenceShape Shape;
+};
+
template<typename VectorsType, typename CoeffsType, int Side>
struct hseq_side_dependent_impl
{
@@ -307,12 +316,36 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
template<typename Dest, typename Workspace>
inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const
{
- workspace.resize(dst.cols());
- for(Index k = 0; k < m_length; ++k)
+ const Index BlockSize = 48;
+ // if the entries are large enough, then apply the reflectors by block
+ if(m_length>=BlockSize && dst.cols()>1)
{
- Index actual_k = m_trans ? k : m_length-k-1;
- dst.bottomRows(rows()-m_shift-actual_k)
- .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+ for(Index i = 0; i < m_length; i+=BlockSize)
+ {
+ Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i;
+ Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize);
+ Index bs = end-k;
+ Index start = k + m_shift;
+
+ typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;
+ SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,
+ Side==OnTheRight ? start : k,
+ Side==OnTheRight ? bs : m_vectors.rows()-start,
+ Side==OnTheRight ? m_vectors.cols()-start : bs);
+ typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);
+ Block<Dest,Dynamic,Dynamic> sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols());
+ apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans);
+ }
+ }
+ else
+ {
+ workspace.resize(dst.cols());
+ for(Index k = 0; k < m_length; ++k)
+ {
+ Index actual_k = m_trans ? k : m_length-k-1;
+ dst.bottomRows(rows()-m_shift-actual_k)
+ .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+ }
}
}
diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
index 1f3c060d0..3991afa8f 100644
--- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -43,7 +43,7 @@ class DiagonalPreconditioner
DiagonalPreconditioner() : m_isInitialized(false) {}
template<typename MatType>
- DiagonalPreconditioner(const MatType& mat) : m_invdiag(mat.cols())
+ explicit DiagonalPreconditioner(const MatType& mat) : m_invdiag(mat.cols())
{
compute(mat);
}
@@ -80,19 +80,20 @@ class DiagonalPreconditioner
return factorize(mat);
}
+ /** \internal */
template<typename Rhs, typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const Rhs& b, Dest& x) const
{
x = m_invdiag.array() * b.array() ;
}
- template<typename Rhs> inline const internal::solve_retval<DiagonalPreconditioner, Rhs>
+ template<typename Rhs> inline const Solve<DiagonalPreconditioner, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "DiagonalPreconditioner is not initialized.");
eigen_assert(m_invdiag.size()==b.rows()
&& "DiagonalPreconditioner::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<DiagonalPreconditioner, Rhs>(*this, b.derived());
+ return Solve<DiagonalPreconditioner, Rhs>(*this, b.derived());
}
protected:
@@ -100,22 +101,6 @@ class DiagonalPreconditioner
bool m_isInitialized;
};
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<DiagonalPreconditioner<_MatrixType>, Rhs>
- : solve_retval_base<DiagonalPreconditioner<_MatrixType>, Rhs>
-{
- typedef DiagonalPreconditioner<_MatrixType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-}
/** \ingroup IterativeLinearSolvers_Module
* \brief A naive preconditioner which approximates any matrix as the identity matrix
@@ -129,7 +114,7 @@ class IdentityPreconditioner
IdentityPreconditioner() {}
template<typename MatrixType>
- IdentityPreconditioner(const MatrixType& ) {}
+ explicit IdentityPreconditioner(const MatrixType& ) {}
template<typename MatrixType>
IdentityPreconditioner& analyzePattern(const MatrixType& ) { return *this; }
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index 27824b9d5..224fe913f 100644
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -143,7 +143,7 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
* step execution example starting with a random guess and printing the evolution
* of the estimated error:
* \include BiCGSTAB_step_by_step.cpp
- * Note that such a step by step excution is slightly slower.
+ * Note that such a step by step execution is slightly slower.
*
* \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
*/
@@ -178,29 +178,13 @@ public:
* this class becomes invalid. Call compute() to update it with the new
* matrix A, or modify a copy of A.
*/
- BiCGSTAB(const MatrixType& A) : Base(A) {}
+ explicit BiCGSTAB(const MatrixType& A) : Base(A) {}
~BiCGSTAB() {}
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
- * \a x0 as an initial solution.
- *
- * \sa compute()
- */
- template<typename Rhs,typename Guess>
- inline const internal::solve_retval_with_guess<BiCGSTAB, Rhs, Guess>
- solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
- {
- eigen_assert(m_isInitialized && "BiCGSTAB is not initialized.");
- eigen_assert(Base::rows()==b.rows()
- && "BiCGSTAB::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval_with_guess
- <BiCGSTAB, Rhs, Guess>(*this, b.derived(), x0);
- }
-
+
/** \internal */
template<typename Rhs,typename Dest>
- void _solveWithGuess(const Rhs& b, Dest& x) const
+ void _solve_with_guess_impl(const Rhs& b, Dest& x) const
{
bool failed = false;
for(int j=0; j<b.cols(); ++j)
@@ -219,36 +203,19 @@ public:
}
/** \internal */
+ using Base::_solve_impl;
template<typename Rhs,typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const
{
-// x.setZero();
- x = b;
- _solveWithGuess(b,x);
+ // x.setZero();
+ x = b;
+ _solve_with_guess_impl(b,x);
}
protected:
};
-
-namespace internal {
-
- template<typename _MatrixType, typename _Preconditioner, typename Rhs>
-struct solve_retval<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs>
- : solve_retval_base<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs>
-{
- typedef BiCGSTAB<_MatrixType, _Preconditioner> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_BICGSTAB_H
diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
index 3ce517940..b5ef6d60f 100644
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -189,29 +189,13 @@ public:
* this class becomes invalid. Call compute() to update it with the new
* matrix A, or modify a copy of A.
*/
- ConjugateGradient(const MatrixType& A) : Base(A) {}
+ explicit ConjugateGradient(const MatrixType& A) : Base(A) {}
~ConjugateGradient() {}
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
- * \a x0 as an initial solution.
- *
- * \sa compute()
- */
- template<typename Rhs,typename Guess>
- inline const internal::solve_retval_with_guess<ConjugateGradient, Rhs, Guess>
- solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
- {
- eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
- eigen_assert(Base::rows()==b.rows()
- && "ConjugateGradient::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval_with_guess
- <ConjugateGradient, Rhs, Guess>(*this, b.derived(), x0);
- }
/** \internal */
template<typename Rhs,typename Dest>
- void _solveWithGuess(const Rhs& b, Dest& x) const
+ void _solve_with_guess_impl(const Rhs& b, Dest& x) const
{
m_iterations = Base::maxIterations();
m_error = Base::m_tolerance;
@@ -231,35 +215,18 @@ public:
}
/** \internal */
+ using Base::_solve_impl;
template<typename Rhs,typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const
{
x.setOnes();
- _solveWithGuess(b,x);
+ _solve_with_guess_impl(b.derived(),x);
}
protected:
};
-
-namespace internal {
-
-template<typename _MatrixType, int _UpLo, typename _Preconditioner, typename Rhs>
-struct solve_retval<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs>
- : solve_retval_base<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs>
-{
- typedef ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_CONJUGATE_GRADIENT_H
diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
index b55afc136..8ed9bdecc 100644
--- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -93,8 +94,12 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
* http://comments.gmane.org/gmane.comp.lib.eigen/3302
*/
template <typename _Scalar>
-class IncompleteLUT : internal::noncopyable
+class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar> >
{
+ protected:
+ typedef SparseSolverBase<IncompleteLUT<_Scalar> > Base;
+ using Base::m_isInitialized;
+ public:
typedef _Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Matrix<Scalar,Dynamic,1> Vector;
@@ -107,13 +112,13 @@ class IncompleteLUT : internal::noncopyable
IncompleteLUT()
: m_droptol(NumTraits<Scalar>::dummy_precision()), m_fillfactor(10),
- m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false)
+ m_analysisIsOk(false), m_factorizationIsOk(false)
{}
template<typename MatrixType>
- IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
+ explicit IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
: m_droptol(droptol),m_fillfactor(fillfactor),
- m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false)
+ m_analysisIsOk(false),m_factorizationIsOk(false)
{
eigen_assert(fillfactor != 0);
compute(mat);
@@ -158,7 +163,7 @@ class IncompleteLUT : internal::noncopyable
void setFillfactor(int fillfactor);
template<typename Rhs, typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const Rhs& b, Dest& x) const
{
x = m_Pinv * b;
x = m_lu.template triangularView<UnitLower>().solve(x);
@@ -166,15 +171,6 @@ class IncompleteLUT : internal::noncopyable
x = m_P * x;
}
- template<typename Rhs> inline const internal::solve_retval<IncompleteLUT, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "IncompleteLUT is not initialized.");
- eigen_assert(cols()==b.rows()
- && "IncompleteLUT::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<IncompleteLUT, Rhs>(*this, b.derived());
- }
-
protected:
/** keeps off-diagonal entries; drops diagonal entries */
@@ -192,7 +188,6 @@ protected:
int m_fillfactor;
bool m_analysisIsOk;
bool m_factorizationIsOk;
- bool m_isInitialized;
ComputationInfo m_info;
PermutationMatrix<Dynamic,Dynamic,Index> m_P; // Fill-reducing permutation
PermutationMatrix<Dynamic,Dynamic,Index> m_Pinv; // Inverse permutation
@@ -445,23 +440,6 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
m_info = Success;
}
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<IncompleteLUT<_MatrixType>, Rhs>
- : solve_retval_base<IncompleteLUT<_MatrixType>, Rhs>
-{
- typedef IncompleteLUT<_MatrixType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_INCOMPLETE_LUT_H
diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
index 2036922d6..f33c868bb 100644
--- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -18,8 +18,12 @@ namespace Eigen {
* \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
*/
template< typename Derived>
-class IterativeSolverBase : internal::noncopyable
+class IterativeSolverBase : public SparseSolverBase<Derived>
{
+protected:
+ typedef SparseSolverBase<Derived> Base;
+ using Base::m_isInitialized;
+
public:
typedef typename internal::traits<Derived>::MatrixType MatrixType;
typedef typename internal::traits<Derived>::Preconditioner Preconditioner;
@@ -29,8 +33,7 @@ public:
public:
- Derived& derived() { return *static_cast<Derived*>(this); }
- const Derived& derived() const { return *static_cast<const Derived*>(this); }
+ using Base::derived;
/** Default constructor. */
IterativeSolverBase()
@@ -49,7 +52,7 @@ public:
* this class becomes invalid. Call compute() to update it with the new
* matrix A, or modify a copy of A.
*/
- IterativeSolverBase(const MatrixType& A)
+ explicit IterativeSolverBase(const MatrixType& A)
{
init();
compute(A);
@@ -57,10 +60,10 @@ public:
~IterativeSolverBase() {}
- /** Initializes the iterative solver for the sparcity pattern of the matrix \a A for further solving \c Ax=b problems.
+ /** Initializes the iterative solver for the sparsity pattern of the matrix \a A for further solving \c Ax=b problems.
*
- * Currently, this function mostly call analyzePattern on the preconditioner. In the future
- * we might, for instance, implement column reodering for faster matrix vector products.
+ * Currently, this function mostly calls analyzePattern on the preconditioner. In the future
+ * we might, for instance, implement column reordering for faster matrix vector products.
*/
Derived& analyzePattern(const MatrixType& A)
{
@@ -73,7 +76,7 @@ public:
/** Initializes the iterative solver with the numerical values of the matrix \a A for further solving \c Ax=b problems.
*
- * Currently, this function mostly call factorize on the preconditioner.
+ * Currently, this function mostly calls factorize on the preconditioner.
*
* \warning this class stores a reference to the matrix A as well as some
* precomputed values that depend on it. Therefore, if \a A is changed
@@ -92,8 +95,8 @@ public:
/** Initializes the iterative solver with the matrix \a A for further solving \c Ax=b problems.
*
- * Currently, this function mostly initialized/compute the preconditioner. In the future
- * we might, for instance, implement column reodering for faster matrix vector products.
+ * Currently, this function mostly initializes/computes the preconditioner. In the future
+ * we might, for instance, implement column reordering for faster matrix vector products.
*
* \warning this class stores a reference to the matrix A as well as some
* precomputed values that depend on it. Therefore, if \a A is changed
@@ -159,31 +162,18 @@ public:
return m_error;
}
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs> inline const internal::solve_retval<Derived, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
- eigen_assert(rows()==b.rows()
- && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<Derived, Rhs>(derived(), b.derived());
- }
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
+ /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
+ * and \a x0 as an initial solution.
*
- * \sa compute()
+ * \sa solve(), compute()
*/
- template<typename Rhs>
- inline const internal::sparse_solve_retval<IterativeSolverBase, Rhs>
- solve(const SparseMatrixBase<Rhs>& b) const
+ template<typename Rhs,typename Guess>
+ inline const SolveWithGuess<Derived, Rhs, Guess>
+ solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
{
- eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
- eigen_assert(rows()==b.rows()
- && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b");
- return internal::sparse_solve_retval<IterativeSolverBase, Rhs>(*this, b.derived());
+ eigen_assert(m_isInitialized && "Solver is not initialized.");
+ eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+ return SolveWithGuess<Derived, Rhs, Guess>(derived(), b.derived(), x0);
}
/** \returns Success if the iterations converged, and NoConvergence otherwise. */
@@ -195,7 +185,7 @@ public:
/** \internal */
template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
- void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
+ void _solve_impl(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
{
eigen_assert(rows()==b.rows());
@@ -229,26 +219,9 @@ protected:
mutable RealScalar m_error;
mutable int m_iterations;
mutable ComputationInfo m_info;
- mutable bool m_isInitialized, m_analysisIsOk, m_factorizationIsOk;
-};
-
-namespace internal {
-
-template<typename Derived, typename Rhs>
-struct sparse_solve_retval<IterativeSolverBase<Derived>, Rhs>
- : sparse_solve_retval_base<IterativeSolverBase<Derived>, Rhs>
-{
- typedef IterativeSolverBase<Derived> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec().derived()._solve_sparse(rhs(),dst);
- }
+ mutable bool m_analysisIsOk, m_factorizationIsOk;
};
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_ITERATIVE_SOLVER_BASE_H
diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
new file mode 100644
index 000000000..251c6fa1a
--- /dev/null
+++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
@@ -0,0 +1,113 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVEWITHGUESS_H
+#define EIGEN_SOLVEWITHGUESS_H
+
+namespace Eigen {
+
+template<typename Decomposition, typename RhsType, typename GuessType> class SolveWithGuess;
+
+/** \class SolveWithGuess
+ * \ingroup IterativeLinearSolvers_Module
+ *
+ * \brief Pseudo expression representing a solving operation
+ *
+ * \tparam Decomposition the type of the matrix or decomposion object
+ * \tparam Rhstype the type of the right-hand side
+ *
+ * This class represents an expression of A.solve(B)
+ * and most of the time this is the only way it is used.
+ *
+ */
+namespace internal {
+
+
+template<typename Decomposition, typename RhsType, typename GuessType>
+struct traits<SolveWithGuess<Decomposition, RhsType, GuessType> >
+ : traits<Solve<Decomposition,RhsType> >
+{};
+
+}
+
+
+template<typename Decomposition, typename RhsType, typename GuessType>
+class SolveWithGuess : public internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type
+{
+public:
+ typedef typename RhsType::Index Index;
+ typedef typename internal::traits<SolveWithGuess>::Scalar Scalar;
+ typedef typename internal::traits<SolveWithGuess>::PlainObject PlainObject;
+ typedef typename internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type Base;
+
+ SolveWithGuess(const Decomposition &dec, const RhsType &rhs, const GuessType &guess)
+ : m_dec(dec), m_rhs(rhs), m_guess(guess)
+ {}
+
+ EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
+
+ EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
+ EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; }
+ EIGEN_DEVICE_FUNC const GuessType& guess() const { return m_guess; }
+
+protected:
+ const Decomposition &m_dec;
+ const RhsType &m_rhs;
+ const GuessType &m_guess;
+
+private:
+ Scalar coeff(Index row, Index col) const;
+ Scalar coeff(Index i) const;
+};
+
+namespace internal {
+
+// Evaluator of SolveWithGuess -> eval into a temporary
+template<typename Decomposition, typename RhsType, typename GuessType>
+struct evaluator<SolveWithGuess<Decomposition,RhsType, GuessType> >
+ : public evaluator<typename SolveWithGuess<Decomposition,RhsType,GuessType>::PlainObject>::type
+{
+ typedef SolveWithGuess<Decomposition,RhsType,GuessType> SolveType;
+ typedef typename SolveType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ evaluator(const SolveType& solve)
+ : m_result(solve.rows(), solve.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ solve.dec()._solve_with_guess_impl(solve.rhs(), m_result, solve().guess());
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+// Specialization for "dst = dec.solveWithGuess(rhs)"
+// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
+template<typename DstXprType, typename DecType, typename RhsType, typename GuessType, typename Scalar>
+struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef SolveWithGuess<DecType,RhsType,GuessType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ // FIXME shall we resize dst here?
+ dst = src.guess();
+ src.dec()._solve_with_guess_impl(src.rhs(), dst/*, src.guess()*/);
+ }
+};
+
+} // end namepsace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVEWITHGUESS_H
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
index bb8e78a8a..d6a3c1e5a 100644
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@@ -92,7 +92,7 @@ template<typename Derived>
inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
{
eigen_assert(rows() == cols());
- typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested;
+ typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested;
return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
}
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 971b9da1d..96f2cebee 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -12,6 +12,15 @@
namespace Eigen {
+namespace internal {
+template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+ enum { Flags = 0 };
+};
+
+} // end namespace internal
+
/** \ingroup LU_Module
*
* \class FullPivLU
@@ -62,6 +71,7 @@ template<typename _MatrixType> class FullPivLU
typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType;
typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType;
+ typedef typename MatrixType::PlainObject PlainObject;
/**
* \brief Default Constructor.
@@ -84,7 +94,7 @@ template<typename _MatrixType> class FullPivLU
* \param matrix the matrix of which to compute the LU decomposition.
* It is required to be nonzero.
*/
- FullPivLU(const MatrixType& matrix);
+ explicit FullPivLU(const MatrixType& matrix);
/** Computes the LU decomposition of the given matrix.
*
@@ -211,11 +221,11 @@ template<typename _MatrixType> class FullPivLU
* \sa TriangularView::solve(), kernel(), inverse()
*/
template<typename Rhs>
- inline const internal::solve_retval<FullPivLU, Rhs>
+ inline const Solve<FullPivLU, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "LU is not initialized.");
- return internal::solve_retval<FullPivLU, Rhs>(*this, b.derived());
+ return Solve<FullPivLU, Rhs>(*this, b.derived());
}
/** \returns the determinant of the matrix of which
@@ -360,18 +370,23 @@ template<typename _MatrixType> class FullPivLU
*
* \sa MatrixBase::inverse()
*/
- inline const internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> inverse() const
+ inline const Inverse<FullPivLU> inverse() const
{
eigen_assert(m_isInitialized && "LU is not initialized.");
eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!");
- return internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType>
- (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+ return Inverse<FullPivLU>(*this);
}
MatrixType reconstructedMatrix() const;
inline Index rows() const { return m_lu.rows(); }
inline Index cols() const { return m_lu.cols(); }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const;
+ #endif
protected:
MatrixType m_lu;
@@ -663,64 +678,72 @@ struct image_retval<FullPivLU<_MatrixType> >
/***** Implementation of solve() *****************************************************/
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<FullPivLU<_MatrixType>, Rhs>
- : solve_retval_base<FullPivLU<_MatrixType>, Rhs>
-{
- EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs)
+} // end namespace internal
- template<typename Dest> void evalTo(Dest& dst) const
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+ /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
+ * So we proceed as follows:
+ * Step 1: compute c = P * rhs.
+ * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
+ * Step 3: replace c by the solution x to Ux = c. May or may not exist.
+ * Step 4: result = Q * c;
+ */
+
+ const Index rows = this->rows(),
+ cols = this->cols(),
+ nonzero_pivots = this->nonzeroPivots();
+ eigen_assert(rhs.rows() == rows);
+ const Index smalldim = (std::min)(rows, cols);
+
+ if(nonzero_pivots == 0)
{
- /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
- * So we proceed as follows:
- * Step 1: compute c = P * rhs.
- * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
- * Step 3: replace c by the solution x to Ux = c. May or may not exist.
- * Step 4: result = Q * c;
- */
-
- const Index rows = dec().rows(), cols = dec().cols(),
- nonzero_pivots = dec().nonzeroPivots();
- eigen_assert(rhs().rows() == rows);
- const Index smalldim = (std::min)(rows, cols);
-
- if(nonzero_pivots == 0)
- {
- dst.setZero();
- return;
- }
+ dst.setZero();
+ return;
+ }
- typename Rhs::PlainObject c(rhs().rows(), rhs().cols());
+ typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
- // Step 1
- c = dec().permutationP() * rhs();
+ // Step 1
+ c = permutationP() * rhs;
- // Step 2
- dec().matrixLU()
- .topLeftCorner(smalldim,smalldim)
- .template triangularView<UnitLower>()
- .solveInPlace(c.topRows(smalldim));
- if(rows>cols)
- {
- c.bottomRows(rows-cols)
- -= dec().matrixLU().bottomRows(rows-cols)
- * c.topRows(cols);
- }
+ // Step 2
+ m_lu.topLeftCorner(smalldim,smalldim)
+ .template triangularView<UnitLower>()
+ .solveInPlace(c.topRows(smalldim));
+ if(rows>cols)
+ c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols);
+
+ // Step 3
+ m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+ .template triangularView<Upper>()
+ .solveInPlace(c.topRows(nonzero_pivots));
+
+ // Step 4
+ for(Index i = 0; i < nonzero_pivots; ++i)
+ dst.row(permutationQ().indices().coeff(i)) = c.row(i);
+ for(Index i = nonzero_pivots; i < m_lu.cols(); ++i)
+ dst.row(permutationQ().indices().coeff(i)).setZero();
+}
+#endif
+
+namespace internal {
- // Step 3
- dec().matrixLU()
- .topLeftCorner(nonzero_pivots, nonzero_pivots)
- .template triangularView<Upper>()
- .solveInPlace(c.topRows(nonzero_pivots));
-
- // Step 4
- for(Index i = 0; i < nonzero_pivots; ++i)
- dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i);
- for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i)
- dst.row(dec().permutationQ().indices().coeff(i)).setZero();
+
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef FullPivLU<MatrixType> LuType;
+ typedef Inverse<LuType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
};
-
} // end namespace internal
/******* MatrixBase methods *****************************************************************/
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/InverseImpl.h
index 8d1364e0a..e5f270d19 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -2,13 +2,14 @@
// for linear algebra.
//
// Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_INVERSE_H
-#define EIGEN_INVERSE_H
+#ifndef EIGEN_INVERSE_IMPL_H
+#define EIGEN_INVERSE_IMPL_H
namespace Eigen {
@@ -42,7 +43,8 @@ struct compute_inverse<MatrixType, ResultType, 1>
static inline void run(const MatrixType& matrix, ResultType& result)
{
typedef typename MatrixType::Scalar Scalar;
- result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0);
+ typename internal::evaluator<MatrixType>::type matrixEval(matrix);
+ result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0);
}
};
@@ -75,10 +77,10 @@ inline void compute_inverse_size2_helper(
const MatrixType& matrix, const typename ResultType::Scalar& invdet,
ResultType& result)
{
- result.coeffRef(0,0) = matrix.coeff(1,1) * invdet;
+ result.coeffRef(0,0) = matrix.coeff(1,1) * invdet;
result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
- result.coeffRef(1,1) = matrix.coeff(0,0) * invdet;
+ result.coeffRef(1,1) = matrix.coeff(0,0) * invdet;
}
template<typename MatrixType, typename ResultType>
@@ -279,41 +281,33 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
*** MatrixBase methods ***
*************************/
-template<typename MatrixType>
-struct traits<inverse_impl<MatrixType> >
-{
- typedef typename MatrixType::PlainObject ReturnType;
-};
-
-template<typename MatrixType>
-struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
-{
- typedef typename MatrixType::Index Index;
- typedef typename internal::eval<MatrixType>::type MatrixTypeNested;
- typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
- MatrixTypeNested m_matrix;
-
- EIGEN_DEVICE_FUNC
- inverse_impl(const MatrixType& matrix)
- : m_matrix(matrix)
- {}
+} // end namespace internal
- EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
- EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
+namespace internal {
- template<typename Dest>
- EIGEN_DEVICE_FUNC
- inline void evalTo(Dest& dst) const
+// Specialization for "dense = dense_xpr.inverse()"
+template<typename DstXprType, typename XprType, typename Scalar>
+struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef Inverse<XprType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
{
- const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime);
+ // FIXME shall we resize dst here?
+ const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
EIGEN_ONLY_USED_FOR_DEBUG(Size);
- eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst)))
+ eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
&& "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");
- compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst);
+ typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type ActualXprType;
+ typedef typename internal::remove_all<ActualXprType>::type ActualXprTypeCleanded;
+
+ ActualXprType actual_xpr(src.nestedExpression());
+
+ compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst);
}
};
+
} // end namespace internal
/** \lu_module
@@ -334,11 +328,11 @@ struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
* \sa computeInverseAndDetWithCheck()
*/
template<typename Derived>
-inline const internal::inverse_impl<Derived> MatrixBase<Derived>::inverse() const
+inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
{
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
eigen_assert(rows() == cols());
- return internal::inverse_impl<Derived>(derived());
+ return Inverse<Derived>(derived());
}
/** \lu_module
@@ -374,7 +368,7 @@ inline void MatrixBase<Derived>::computeInverseAndDetWithCheck(
// for larger sizes, evaluating has negligible cost and limits code size.
typedef typename internal::conditional<
RowsAtCompileTime == 2,
- typename internal::remove_all<typename internal::nested<Derived, 2>::type>::type,
+ typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type,
PlainObject
>::type MatrixType;
internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run
@@ -414,4 +408,4 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
} // end namespace Eigen
-#endif // EIGEN_INVERSE_H
+#endif // EIGEN_INVERSE_IMPL_H
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 2f65c3a49..d04e4191b 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -13,6 +13,19 @@
namespace Eigen {
+namespace internal {
+template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+ typedef traits<_MatrixType> BaseTraits;
+ enum {
+ Flags = BaseTraits::Flags & RowMajorBit,
+ CoeffReadCost = Dynamic
+ };
+};
+
+} // end namespace internal
+
/** \ingroup LU_Module
*
* \class PartialPivLU
@@ -62,6 +75,7 @@ template<typename _MatrixType> class PartialPivLU
typedef typename MatrixType::Index Index;
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
+ typedef typename MatrixType::PlainObject PlainObject;
/**
@@ -78,7 +92,7 @@ template<typename _MatrixType> class PartialPivLU
* according to the specified problem \a size.
* \sa PartialPivLU()
*/
- PartialPivLU(Index size);
+ explicit PartialPivLU(Index size);
/** Constructor.
*
@@ -87,7 +101,7 @@ template<typename _MatrixType> class PartialPivLU
* \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
* If you need to deal with non-full rank, use class FullPivLU instead.
*/
- PartialPivLU(const MatrixType& matrix);
+ explicit PartialPivLU(const MatrixType& matrix);
PartialPivLU& compute(const MatrixType& matrix);
@@ -129,11 +143,11 @@ template<typename _MatrixType> class PartialPivLU
* \sa TriangularView::solve(), inverse(), computeInverse()
*/
template<typename Rhs>
- inline const internal::solve_retval<PartialPivLU, Rhs>
+ inline const Solve<PartialPivLU, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
- return internal::solve_retval<PartialPivLU, Rhs>(*this, b.derived());
+ return Solve<PartialPivLU, Rhs>(*this, b.derived());
}
/** \returns the inverse of the matrix of which *this is the LU decomposition.
@@ -143,11 +157,10 @@ template<typename _MatrixType> class PartialPivLU
*
* \sa MatrixBase::inverse(), LU::inverse()
*/
- inline const internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> inverse() const
+ inline const Inverse<PartialPivLU> inverse() const
{
eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
- return internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType>
- (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+ return Inverse<PartialPivLU>(*this);
}
/** \returns the determinant of the matrix of which
@@ -169,6 +182,30 @@ template<typename _MatrixType> class PartialPivLU
inline Index rows() const { return m_lu.rows(); }
inline Index cols() const { return m_lu.cols(); }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const {
+ /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+ * So we proceed as follows:
+ * Step 1: compute c = Pb.
+ * Step 2: replace c by the solution x to Lx = c.
+ * Step 3: replace c by the solution x to Ux = c.
+ */
+
+ eigen_assert(rhs.rows() == m_lu.rows());
+
+ // Step 1
+ dst = permutationP() * rhs;
+
+ // Step 2
+ m_lu.template triangularView<UnitLower>().solveInPlace(dst);
+
+ // Step 3
+ m_lu.template triangularView<Upper>().solveInPlace(dst);
+ }
+ #endif
protected:
MatrixType m_lu;
@@ -434,34 +471,17 @@ MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
namespace internal {
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<PartialPivLU<_MatrixType>, Rhs>
- : solve_retval_base<PartialPivLU<_MatrixType>, Rhs>
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
{
- EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
- * So we proceed as follows:
- * Step 1: compute c = Pb.
- * Step 2: replace c by the solution x to Lx = c.
- * Step 3: replace c by the solution x to Ux = c.
- */
-
- eigen_assert(rhs().rows() == dec().matrixLU().rows());
-
- // Step 1
- dst = dec().permutationP() * rhs();
-
- // Step 2
- dec().matrixLU().template triangularView<UnitLower>().solveInPlace(dst);
-
- // Step 3
- dec().matrixLU().template triangularView<Upper>().solveInPlace(dst);
+ typedef PartialPivLU<MatrixType> LuType;
+ typedef Inverse<LuType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
};
-
} // end namespace internal
/******** MatrixBase methods *******/
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index 60b7a2376..1f62ef14e 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -39,9 +39,11 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
ResultAlignment = bool(ResultType::Flags&AlignedBit),
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
+ typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
- static void run(const MatrixType& matrix, ResultType& result)
+ static void run(const MatrixType& mat, ResultType& result)
{
+ ActualMatrixType matrix(mat);
EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
// Load the full matrix into registers
@@ -167,14 +169,17 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
ResultAlignment = bool(ResultType::Flags&AlignedBit),
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
- static void run(const MatrixType& matrix, ResultType& result)
+ typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+
+ static void run(const MatrixType& mat, ResultType& result)
{
+ ActualMatrixType matrix(mat);
const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
// The inverse is calculated using "Divide and Conquer" technique. The
// original matrix is divide into four 2x2 sub-matrices. Since each
- // register of the matrix holds two element, the smaller matrices are
+ // register of the matrix holds two elements, the smaller matrices are
// consisted of two registers. Hence we get a better locality of the
// calculations.
diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h
index 41b4fd7e3..ce7c0bbf3 100644
--- a/Eigen/src/OrderingMethods/Amd.h
+++ b/Eigen/src/OrderingMethods/Amd.h
@@ -106,7 +106,8 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, Permutation
t = cnz + cnz/5 + 2*n; /* add elbow room to C */
C.resizeNonZeros(t);
- Index* W = new Index[8*(n+1)]; /* get workspace */
+ // get workspace
+ ei_declare_aligned_stack_constructed_variable(Index,W,8*(n+1),0);
Index* len = W;
Index* nv = W + (n+1);
Index* next = W + 2*(n+1);
@@ -424,8 +425,6 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, Permutation
}
perm.indices().conservativeResize(n);
-
- delete[] W;
}
} // namespace internal
diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h
index 8a546dc2f..a96c27695 100644
--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -125,9 +125,15 @@ namespace internal
// This is the base class to interface with PaStiX functions.
// Users should not used this class directly.
template <class Derived>
-class PastixBase : internal::noncopyable
+class PastixBase : public SparseSolverBase<Derived>
{
+ protected:
+ typedef SparseSolverBase<Derived> Base;
+ using Base::derived;
+ using Base::m_isInitialized;
public:
+ using Base::_solve_impl;
+
typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType;
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
@@ -138,7 +144,7 @@ class PastixBase : internal::noncopyable
public:
- PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false), m_pastixdata(0), m_size(0)
+ PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_pastixdata(0), m_size(0)
{
init();
}
@@ -147,33 +153,10 @@ class PastixBase : internal::noncopyable
{
clean();
}
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::solve_retval<PastixBase, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "Pastix solver is not initialized.");
- eigen_assert(rows()==b.rows()
- && "PastixBase::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<PastixBase, Rhs>(*this, b.derived());
- }
template<typename Rhs,typename Dest>
- bool _solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;
+ bool _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;
- Derived& derived()
- {
- return *static_cast<Derived*>(this);
- }
- const Derived& derived() const
- {
- return *static_cast<const Derived*>(this);
- }
-
/** Returns a reference to the integer vector IPARM of PaStiX parameters
* to modify the default parameters.
* The statistics related to the different phases of factorization and solve are saved here as well
@@ -228,20 +211,6 @@ class PastixBase : internal::noncopyable
return m_info;
}
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::sparse_solve_retval<PastixBase, Rhs>
- solve(const SparseMatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "Pastix LU, LLT or LDLT is not initialized.");
- eigen_assert(rows()==b.rows()
- && "PastixBase::solve(): invalid number of rows of the right hand side matrix b");
- return internal::sparse_solve_retval<PastixBase, Rhs>(*this, b.derived());
- }
-
protected:
// Initialize the Pastix data structure, check the matrix
@@ -268,7 +237,6 @@ class PastixBase : internal::noncopyable
int m_initisOk;
int m_analysisIsOk;
int m_factorizationIsOk;
- bool m_isInitialized;
mutable ComputationInfo m_info;
mutable pastix_data_t *m_pastixdata; // Data structure for pastix
mutable int m_comm; // The MPI communicator identifier
@@ -328,7 +296,6 @@ void PastixBase<Derived>::compute(ColSpMatrix& mat)
factorize(mat);
m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
- m_isInitialized = m_factorizationIsOk;
}
@@ -393,7 +360,7 @@ void PastixBase<Derived>::factorize(ColSpMatrix& mat)
/* Solve the system */
template<typename Base>
template<typename Rhs,typename Dest>
-bool PastixBase<Base>::_solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const
+bool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const
{
eigen_assert(m_isInitialized && "The matrix should be factorized first");
EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
@@ -450,7 +417,7 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
init();
}
- PastixLU(const MatrixType& matrix):Base()
+ explicit PastixLU(const MatrixType& matrix):Base()
{
init();
compute(matrix);
@@ -560,7 +527,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
init();
}
- PastixLLT(const MatrixType& matrix):Base()
+ explicit PastixLLT(const MatrixType& matrix):Base()
{
init();
compute(matrix);
@@ -641,7 +608,7 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
init();
}
- PastixLDLT(const MatrixType& matrix):Base()
+ explicit PastixLDLT(const MatrixType& matrix):Base()
{
init();
compute(matrix);
@@ -694,36 +661,6 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
}
};
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<PastixBase<_MatrixType>, Rhs>
- : solve_retval_base<PastixBase<_MatrixType>, Rhs>
-{
- typedef PastixBase<_MatrixType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-template<typename _MatrixType, typename Rhs>
-struct sparse_solve_retval<PastixBase<_MatrixType>, Rhs>
- : sparse_solve_retval_base<PastixBase<_MatrixType>, Rhs>
-{
- typedef PastixBase<_MatrixType> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- this->defaultEvalTo(dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif
diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h
index b6571069e..054af6635 100644
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -96,10 +96,17 @@ namespace internal
}
template<class Derived>
-class PardisoImpl : internal::noncopyable
+class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
{
+ protected:
+ typedef SparseSolveBase<PardisoImpl<Derived> Base;
+ using Base::derived;
+ using Base::m_isInitialized;
+
typedef internal::pardiso_traits<Derived> Traits;
public:
+ using base::_solve_impl;
+
typedef typename Traits::MatrixType MatrixType;
typedef typename Traits::Scalar Scalar;
typedef typename Traits::RealScalar RealScalar;
@@ -118,7 +125,7 @@ class PardisoImpl : internal::noncopyable
eigen_assert((sizeof(Index) >= sizeof(_INTEGER_t) && sizeof(Index) <= 8) && "Non-supported index type");
m_iparm.setZero();
m_msglvl = 0; // No output
- m_initialized = false;
+ m_isInitialized = false;
}
~PardisoImpl()
@@ -136,7 +143,7 @@ class PardisoImpl : internal::noncopyable
*/
ComputationInfo info() const
{
- eigen_assert(m_initialized && "Decomposition is not initialized.");
+ eigen_assert(m_isInitialized && "Decomposition is not initialized.");
return m_info;
}
@@ -165,51 +172,14 @@ class PardisoImpl : internal::noncopyable
Derived& factorize(const MatrixType& matrix);
Derived& compute(const MatrixType& matrix);
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::solve_retval<PardisoImpl, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_initialized && "Pardiso solver is not initialized.");
- eigen_assert(rows()==b.rows()
- && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<PardisoImpl, Rhs>(*this, b.derived());
- }
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::sparse_solve_retval<PardisoImpl, Rhs>
- solve(const SparseMatrixBase<Rhs>& b) const
- {
- eigen_assert(m_initialized && "Pardiso solver is not initialized.");
- eigen_assert(rows()==b.rows()
- && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b");
- return internal::sparse_solve_retval<PardisoImpl, Rhs>(*this, b.derived());
- }
-
- Derived& derived()
- {
- return *static_cast<Derived*>(this);
- }
- const Derived& derived() const
- {
- return *static_cast<const Derived*>(this);
- }
template<typename BDerived, typename XDerived>
- bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
+ bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
protected:
void pardisoRelease()
{
- if(m_initialized) // Factorization ran at least once
+ if(m_isInitialized) // Factorization ran at least once
{
internal::pardiso_run_selector<Index>::run(m_pt, 1, 1, m_type, -1, m_size, 0, 0, 0, m_perm.data(), 0,
m_iparm.data(), m_msglvl, 0, 0);
@@ -270,7 +240,7 @@ class PardisoImpl : internal::noncopyable
mutable SparseMatrixType m_matrix;
ComputationInfo m_info;
- bool m_initialized, m_analysisIsOk, m_factorizationIsOk;
+ bool m_analysisIsOk, m_factorizationIsOk;
Index m_type, m_msglvl;
mutable void *m_pt[64];
mutable ParameterType m_iparm;
@@ -298,7 +268,7 @@ Derived& PardisoImpl<Derived>::compute(const MatrixType& a)
manageErrorCode(error);
m_analysisIsOk = true;
m_factorizationIsOk = true;
- m_initialized = true;
+ m_isInitialized = true;
return derived();
}
@@ -321,7 +291,7 @@ Derived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a)
manageErrorCode(error);
m_analysisIsOk = true;
m_factorizationIsOk = false;
- m_initialized = true;
+ m_isInitialized = true;
return derived();
}
@@ -345,7 +315,7 @@ Derived& PardisoImpl<Derived>::factorize(const MatrixType& a)
template<class Base>
template<typename BDerived,typename XDerived>
-bool PardisoImpl<Base>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
+bool PardisoImpl<Base>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
{
if(m_iparm[0] == 0) // Factorization was not computed
return false;
@@ -421,7 +391,7 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
pardisoInit(Base::ScalarIsComplex ? 13 : 11);
}
- PardisoLU(const MatrixType& matrix)
+ explicit PardisoLU(const MatrixType& matrix)
: Base()
{
pardisoInit(Base::ScalarIsComplex ? 13 : 11);
@@ -472,7 +442,7 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
pardisoInit(Base::ScalarIsComplex ? 4 : 2);
}
- PardisoLLT(const MatrixType& matrix)
+ explicit PardisoLLT(const MatrixType& matrix)
: Base()
{
pardisoInit(Base::ScalarIsComplex ? 4 : 2);
@@ -530,7 +500,7 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2);
}
- PardisoLDLT(const MatrixType& matrix)
+ explicit PardisoLDLT(const MatrixType& matrix)
: Base()
{
pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2);
@@ -546,36 +516,6 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
}
};
-namespace internal {
-
-template<typename _Derived, typename Rhs>
-struct solve_retval<PardisoImpl<_Derived>, Rhs>
- : solve_retval_base<PardisoImpl<_Derived>, Rhs>
-{
- typedef PardisoImpl<_Derived> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-template<typename Derived, typename Rhs>
-struct sparse_solve_retval<PardisoImpl<Derived>, Rhs>
- : sparse_solve_retval_base<PardisoImpl<Derived>, Rhs>
-{
- typedef PardisoImpl<Derived> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- this->defaultEvalTo(dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_PARDISOSUPPORT_H
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 4824880f5..370cb69e3 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -13,6 +13,15 @@
namespace Eigen {
+namespace internal {
+template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
+ : traits<_MatrixType>
+{
+ enum { Flags = 0 };
+};
+
+} // end namespace internal
+
/** \ingroup QR_Module
*
* \class ColPivHouseholderQR
@@ -56,6 +65,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
+ typedef typename MatrixType::PlainObject PlainObject;
private:
@@ -107,7 +117,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
*
* \sa compute()
*/
- ColPivHouseholderQR(const MatrixType& matrix)
+ explicit ColPivHouseholderQR(const MatrixType& matrix)
: m_qr(matrix.rows(), matrix.cols()),
m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
m_colsPermutation(PermIndexType(matrix.cols())),
@@ -138,15 +148,15 @@ template<typename _MatrixType> class ColPivHouseholderQR
* Output: \verbinclude ColPivHouseholderQR_solve.out
*/
template<typename Rhs>
- inline const internal::solve_retval<ColPivHouseholderQR, Rhs>
+ inline const Solve<ColPivHouseholderQR, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
- return internal::solve_retval<ColPivHouseholderQR, Rhs>(*this, b.derived());
+ return Solve<ColPivHouseholderQR, Rhs>(*this, b.derived());
}
- HouseholderSequenceType householderQ(void) const;
- HouseholderSequenceType matrixQ(void) const
+ HouseholderSequenceType householderQ() const;
+ HouseholderSequenceType matrixQ() const
{
return householderQ();
}
@@ -284,13 +294,10 @@ template<typename _MatrixType> class ColPivHouseholderQR
* \note If this matrix is not invertible, the returned matrix has undefined coefficients.
* Use isInvertible() to first determine whether this matrix is invertible.
*/
- inline const
- internal::solve_retval<ColPivHouseholderQR, typename MatrixType::IdentityReturnType>
- inverse() const
+ inline const Inverse<ColPivHouseholderQR> inverse() const
{
eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
- return internal::solve_retval<ColPivHouseholderQR,typename MatrixType::IdentityReturnType>
- (*this, MatrixType::Identity(m_qr.rows(), m_qr.cols()));
+ return Inverse<ColPivHouseholderQR>(*this);
}
inline Index rows() const { return m_qr.rows(); }
@@ -382,6 +389,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
return Success;
}
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const;
+ #endif
protected:
MatrixType m_qr;
@@ -463,20 +476,10 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
// we store that back into our table: it can't hurt to correct our table.
m_colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm;
- // if the current biggest column is smaller than epsilon times the initial biggest column,
- // terminate to avoid generating nan/inf values.
- // Note that here, if we test instead for "biggest == 0", we get a failure every 1000 (or so)
- // repetitions of the unit test, with the result of solve() filled with large values of the order
- // of 1/(size*epsilon).
- if(biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))
- {
+ // Track the number of meaningful pivots but do not stop the decomposition to make
+ // sure that the initial matrix is properly reproduced. See bug 941.
+ if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))
m_nonzero_pivots = k;
- m_hCoeffs.tail(size-k).setZero();
- m_qr.bottomRightCorner(rows-k,cols-k)
- .template triangularView<StrictlyLower>()
- .setZero();
- break;
- }
// apply the transposition to the columns
m_colsTranspositions.coeffRef(k) = biggest_col_index;
@@ -505,7 +508,7 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
}
m_colsPermutation.setIdentity(PermIndexType(cols));
- for(PermIndexType k = 0; k < m_nonzero_pivots; ++k)
+ for(PermIndexType k = 0; k < size/*m_nonzero_pivots*/; ++k)
m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));
m_det_pq = (number_of_transpositions%2) ? -1 : 1;
@@ -514,54 +517,62 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
return *this;
}
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
- : solve_retval_base<ColPivHouseholderQR<_MatrixType>, Rhs>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
- EIGEN_MAKE_SOLVE_HELPERS(ColPivHouseholderQR<_MatrixType>,Rhs)
+ eigen_assert(rhs.rows() == rows());
+
+ const Index nonzero_pivots = nonzeroPivots();
- template<typename Dest> void evalTo(Dest& dst) const
+ if(nonzero_pivots == 0)
{
- eigen_assert(rhs().rows() == dec().rows());
+ dst.setZero();
+ return;
+ }
- const Index cols = dec().cols(),
- nonzero_pivots = dec().nonzeroPivots();
+ typename RhsType::PlainObject c(rhs);
- if(nonzero_pivots == 0)
- {
- dst.setZero();
- return;
- }
+ // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
+ c.applyOnTheLeft(householderSequence(m_qr, m_hCoeffs)
+ .setLength(nonzero_pivots)
+ .transpose()
+ );
- typename Rhs::PlainObject c(rhs());
+ m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots)
+ .template triangularView<Upper>()
+ .solveInPlace(c.topRows(nonzero_pivots));
- // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
- c.applyOnTheLeft(householderSequence(dec().matrixQR(), dec().hCoeffs())
- .setLength(dec().nonzeroPivots())
- .transpose()
- );
+ for(Index i = 0; i < nonzero_pivots; ++i) dst.row(m_colsPermutation.indices().coeff(i)) = c.row(i);
+ for(Index i = nonzero_pivots; i < cols(); ++i) dst.row(m_colsPermutation.indices().coeff(i)).setZero();
+}
+#endif
- dec().matrixR()
- .topLeftCorner(nonzero_pivots, nonzero_pivots)
- .template triangularView<Upper>()
- .solveInPlace(c.topRows(nonzero_pivots));
+namespace internal {
- for(Index i = 0; i < nonzero_pivots; ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i);
- for(Index i = nonzero_pivots; i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero();
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef ColPivHouseholderQR<MatrixType> QrType;
+ typedef Inverse<QrType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
};
} // end namespace internal
-/** \returns the matrix Q as a sequence of householder transformations */
+/** \returns the matrix Q as a sequence of householder transformations.
+ * You can extract the meaningful part only by using:
+ * \code qr.householderQ().setLength(qr.nonzeroPivots()) */
template<typename MatrixType>
typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHouseholderQR<MatrixType>
::householderQ() const
{
eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
- return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate()).setLength(m_nonzero_pivots);
+ return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
}
#ifndef __CUDACC__
diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h
index b5b198326..7b6ba0a5e 100644
--- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h
+++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h
@@ -49,7 +49,6 @@ ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynami
{ \
using std::abs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
- typedef MatrixType::Scalar Scalar; \
typedef MatrixType::RealScalar RealScalar; \
Index rows = matrix.rows();\
Index cols = matrix.cols();\
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index a7b0fc16f..5712d175c 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -15,6 +15,12 @@ namespace Eigen {
namespace internal {
+template<typename _MatrixType> struct traits<FullPivHouseholderQR<_MatrixType> >
+ : traits<_MatrixType>
+{
+ enum { Flags = 0 };
+};
+
template<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType;
template<typename MatrixType>
@@ -23,7 +29,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
typedef typename MatrixType::PlainObject ReturnType;
};
-}
+} // end namespace internal
/** \ingroup QR_Module
*
@@ -69,6 +75,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
typedef typename internal::plain_col_type<MatrixType>::type ColVectorType;
+ typedef typename MatrixType::PlainObject PlainObject;
/** \brief Default Constructor.
*
@@ -113,7 +120,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
*
* \sa compute()
*/
- FullPivHouseholderQR(const MatrixType& matrix)
+ explicit FullPivHouseholderQR(const MatrixType& matrix)
: m_qr(matrix.rows(), matrix.cols()),
m_hCoeffs((std::min)(matrix.rows(), matrix.cols())),
m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())),
@@ -145,11 +152,11 @@ template<typename _MatrixType> class FullPivHouseholderQR
* Output: \verbinclude FullPivHouseholderQR_solve.out
*/
template<typename Rhs>
- inline const internal::solve_retval<FullPivHouseholderQR, Rhs>
+ inline const Solve<FullPivHouseholderQR, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
- return internal::solve_retval<FullPivHouseholderQR, Rhs>(*this, b.derived());
+ return Solve<FullPivHouseholderQR, Rhs>(*this, b.derived());
}
/** \returns Expression object representing the matrix Q
@@ -280,13 +287,11 @@ template<typename _MatrixType> class FullPivHouseholderQR
*
* \note If this matrix is not invertible, the returned matrix has undefined coefficients.
* Use isInvertible() to first determine whether this matrix is invertible.
- */ inline const
- internal::solve_retval<FullPivHouseholderQR, typename MatrixType::IdentityReturnType>
- inverse() const
+ */
+ inline const Inverse<FullPivHouseholderQR> inverse() const
{
eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
- return internal::solve_retval<FullPivHouseholderQR,typename MatrixType::IdentityReturnType>
- (*this, MatrixType::Identity(m_qr.rows(), m_qr.cols()));
+ return Inverse<FullPivHouseholderQR>(*this);
}
inline Index rows() const { return m_qr.rows(); }
@@ -366,6 +371,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
* diagonal coefficient of U.
*/
RealScalar maxPivot() const { return m_maxpivot; }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const;
+ #endif
protected:
MatrixType m_qr;
@@ -485,46 +496,53 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
return *this;
}
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<FullPivHouseholderQR<_MatrixType>, Rhs>
- : solve_retval_base<FullPivHouseholderQR<_MatrixType>, Rhs>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
{
- EIGEN_MAKE_SOLVE_HELPERS(FullPivHouseholderQR<_MatrixType>,Rhs)
+ eigen_assert(rhs.rows() == rows());
+ const Index l_rank = rank();
- template<typename Dest> void evalTo(Dest& dst) const
+ // FIXME introduce nonzeroPivots() and use it here. and more generally,
+ // make the same improvements in this dec as in FullPivLU.
+ if(l_rank==0)
{
- const Index rows = dec().rows(), cols = dec().cols();
- eigen_assert(rhs().rows() == rows);
+ dst.setZero();
+ return;
+ }
- // FIXME introduce nonzeroPivots() and use it here. and more generally,
- // make the same improvements in this dec as in FullPivLU.
- if(dec().rank()==0)
- {
- dst.setZero();
- return;
- }
+ typename RhsType::PlainObject c(rhs);
- typename Rhs::PlainObject c(rhs());
+ Matrix<Scalar,1,RhsType::ColsAtCompileTime> temp(rhs.cols());
+ for (Index k = 0; k < l_rank; ++k)
+ {
+ Index remainingSize = rows()-k;
+ c.row(k).swap(c.row(m_rows_transpositions.coeff(k)));
+ c.bottomRightCorner(remainingSize, rhs.cols())
+ .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingSize-1),
+ m_hCoeffs.coeff(k), &temp.coeffRef(0));
+ }
- Matrix<Scalar,1,Rhs::ColsAtCompileTime> temp(rhs().cols());
- for (Index k = 0; k < dec().rank(); ++k)
- {
- Index remainingSize = rows-k;
- c.row(k).swap(c.row(dec().rowsTranspositions().coeff(k)));
- c.bottomRightCorner(remainingSize, rhs().cols())
- .applyHouseholderOnTheLeft(dec().matrixQR().col(k).tail(remainingSize-1),
- dec().hCoeffs().coeff(k), &temp.coeffRef(0));
- }
+ m_qr.topLeftCorner(l_rank, l_rank)
+ .template triangularView<Upper>()
+ .solveInPlace(c.topRows(l_rank));
- dec().matrixQR()
- .topLeftCorner(dec().rank(), dec().rank())
- .template triangularView<Upper>()
- .solveInPlace(c.topRows(dec().rank()));
+ for(Index i = 0; i < l_rank; ++i) dst.row(m_cols_permutation.indices().coeff(i)) = c.row(i);
+ for(Index i = l_rank; i < cols(); ++i) dst.row(m_cols_permutation.indices().coeff(i)).setZero();
+}
+#endif
- for(Index i = 0; i < dec().rank(); ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i);
- for(Index i = dec().rank(); i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero();
+namespace internal {
+
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+ typedef FullPivHouseholderQR<MatrixType> QrType;
+ typedef Inverse<QrType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
};
@@ -550,7 +568,7 @@ public:
: m_qr(qr),
m_hCoeffs(hCoeffs),
m_rowsTranspositions(rowsTranspositions)
- {}
+ {}
template <typename ResultType>
void evalTo(ResultType& result) const
@@ -580,8 +598,8 @@ public:
}
}
- Index rows() const { return m_qr.rows(); }
- Index cols() const { return m_qr.rows(); }
+ Index rows() const { return m_qr.rows(); }
+ Index cols() const { return m_qr.rows(); }
protected:
typename MatrixType::Nested m_qr;
@@ -589,6 +607,11 @@ protected:
typename IntDiagSizeVectorType::Nested m_rowsTranspositions;
};
+// template<typename MatrixType>
+// struct evaluator<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
+// : public evaluator<ReturnByValue<FullPivHouseholderQRMatrixQReturnType<MatrixType> > >
+// {};
+
} // end namespace internal
template<typename MatrixType>
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 352dbf3f0..f22008494 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -91,7 +91,7 @@ template<typename _MatrixType> class HouseholderQR
*
* \sa compute()
*/
- HouseholderQR(const MatrixType& matrix)
+ explicit HouseholderQR(const MatrixType& matrix)
: m_qr(matrix.rows(), matrix.cols()),
m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
m_temp(matrix.cols()),
@@ -118,11 +118,11 @@ template<typename _MatrixType> class HouseholderQR
* Output: \verbinclude HouseholderQR_solve.out
*/
template<typename Rhs>
- inline const internal::solve_retval<HouseholderQR, Rhs>
+ inline const Solve<HouseholderQR, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
- return internal::solve_retval<HouseholderQR, Rhs>(*this, b.derived());
+ return Solve<HouseholderQR, Rhs>(*this, b.derived());
}
/** This method returns an expression of the unitary matrix Q as a sequence of Householder transformations.
@@ -187,6 +187,12 @@ template<typename _MatrixType> class HouseholderQR
* For advanced uses only.
*/
const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const;
+ #endif
protected:
MatrixType m_qr;
@@ -283,8 +289,8 @@ struct householder_qr_inplace_blocked
for (k = 0; k < size; k += blockSize)
{
Index bs = (std::min)(size-k,blockSize); // actual size of the block
- Index tcols = cols - k - bs; // trailing columns
- Index brows = rows-k; // rows of the block
+ Index tcols = cols - k - bs; // trailing columns
+ Index brows = rows-k; // rows of the block
// partition the matrix:
// A00 | A01 | A02
@@ -302,43 +308,38 @@ struct householder_qr_inplace_blocked
if(tcols)
{
BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
- apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+ apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward
}
}
}
};
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<HouseholderQR<_MatrixType>, Rhs>
- : solve_retval_base<HouseholderQR<_MatrixType>, Rhs>
-{
- EIGEN_MAKE_SOLVE_HELPERS(HouseholderQR<_MatrixType>,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- const Index rows = dec().rows(), cols = dec().cols();
- const Index rank = (std::min)(rows, cols);
- eigen_assert(rhs().rows() == rows);
+} // end namespace internal
- typename Rhs::PlainObject c(rhs());
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+ const Index rank = (std::min)(rows(), cols());
+ eigen_assert(rhs.rows() == rows());
- // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
- c.applyOnTheLeft(householderSequence(
- dec().matrixQR().leftCols(rank),
- dec().hCoeffs().head(rank)).transpose()
- );
+ typename RhsType::PlainObject c(rhs);
- dec().matrixQR()
- .topLeftCorner(rank, rank)
- .template triangularView<Upper>()
- .solveInPlace(c.topRows(rank));
+ // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
+ c.applyOnTheLeft(householderSequence(
+ m_qr.leftCols(rank),
+ m_hCoeffs.head(rank)).transpose()
+ );
- dst.topRows(rank) = c.topRows(rank);
- dst.bottomRows(cols-rank).setZero();
- }
-};
+ m_qr.topLeftCorner(rank, rank)
+ .template triangularView<Upper>()
+ .solveInPlace(c.topRows(rank));
-} // end namespace internal
+ dst.topRows(rank) = c.topRows(rank);
+ dst.bottomRows(cols()-rank).setZero();
+}
+#endif
/** Performs the QR factorization of the given matrix \a matrix. The result of
* the factorization is stored into \c *this, and a reference to \c *this
diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
index a2cc2a9e2..54a1b21b8 100644
--- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -54,29 +55,26 @@ namespace Eigen {
*
*/
template<typename _MatrixType>
-class SPQR
+class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
{
+ protected:
+ typedef SparseSolverBase<SPQR<_MatrixType> > Base;
+ using Base::m_isInitialized;
public:
typedef typename _MatrixType::Scalar Scalar;
typedef typename _MatrixType::RealScalar RealScalar;
typedef UF_long Index ;
typedef SparseMatrix<Scalar, ColMajor, Index> MatrixType;
- typedef PermutationMatrix<Dynamic, Dynamic> PermutationType;
+ typedef Map<PermutationMatrix<Dynamic, Dynamic, Index> > PermutationType;
public:
SPQR()
- : m_isInitialized(false),
- m_ordering(SPQR_ORDERING_DEFAULT),
- m_allow_tol(SPQR_DEFAULT_TOL),
- m_tolerance (NumTraits<Scalar>::epsilon())
+ : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon()), m_useDefaultThreshold(true)
{
cholmod_l_start(&m_cc);
}
- SPQR(const _MatrixType& matrix)
- : m_isInitialized(false),
- m_ordering(SPQR_ORDERING_DEFAULT),
- m_allow_tol(SPQR_DEFAULT_TOL),
- m_tolerance (NumTraits<Scalar>::epsilon())
+ explicit SPQR(const _MatrixType& matrix)
+ : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon()), m_useDefaultThreshold(true)
{
cholmod_l_start(&m_cc);
compute(matrix);
@@ -101,10 +99,25 @@ class SPQR
if(m_isInitialized) SPQR_free();
MatrixType mat(matrix);
+
+ /* Compute the default threshold as in MatLab, see:
+ * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing
+ * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3
+ */
+ RealScalar pivotThreshold = m_tolerance;
+ if(m_useDefaultThreshold)
+ {
+ RealScalar max2Norm = 0.0;
+ for (int j = 0; j < mat.cols(); j++) max2Norm = numext::maxi(max2Norm, mat.col(j).norm());
+ if(max2Norm==RealScalar(0))
+ max2Norm = RealScalar(1);
+ pivotThreshold = 20 * (mat.rows() + mat.cols()) * max2Norm * NumTraits<RealScalar>::epsilon();
+ }
+
cholmod_sparse A;
A = viewAsCholmod(mat);
Index col = matrix.cols();
- m_rank = SuiteSparseQR<Scalar>(m_ordering, m_tolerance, col, &A,
+ m_rank = SuiteSparseQR<Scalar>(m_ordering, pivotThreshold, col, &A,
&m_cR, &m_E, &m_H, &m_HPinv, &m_HTau, &m_cc);
if (!m_cR)
@@ -120,41 +133,37 @@ class SPQR
/**
* Get the number of rows of the input matrix and the Q matrix
*/
- inline Index rows() const {return m_H->nrow; }
+ inline Index rows() const {return m_cR->nrow; }
/**
* Get the number of columns of the input matrix.
*/
inline Index cols() const { return m_cR->ncol; }
-
- /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::solve_retval<SPQR, Rhs> solve(const MatrixBase<Rhs>& B) const
- {
- eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
- eigen_assert(this->rows()==B.rows()
- && "SPQR::solve(): invalid number of rows of the right hand side matrix B");
- return internal::solve_retval<SPQR, Rhs>(*this, B.derived());
- }
template<typename Rhs, typename Dest>
- void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+ void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
{
eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
eigen_assert(b.cols()==1 && "This method is for vectors only");
-
+
//Compute Q^T * b
- typename Dest::PlainObject y;
+ typename Dest::PlainObject y, y2;
y = matrixQ().transpose() * b;
- // Solves with the triangular matrix R
+
+ // Solves with the triangular matrix R
Index rk = this->rank();
- y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y.topRows(rk));
- y.bottomRows(cols()-rk).setZero();
+ y2 = y;
+ y.resize((std::max)(cols(),Index(y.rows())),y.cols());
+ y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y2.topRows(rk));
+
// Apply the column permutation
- dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
+ // colsPermutation() performs a copy of the permutation,
+ // so let's apply it manually:
+ for(Index i = 0; i < rk; ++i) dest.row(m_E[i]) = y.row(i);
+ for(Index i = rk; i < cols(); ++i) dest.row(m_E[i]).setZero();
+
+// y.bottomRows(y.rows()-rk).setZero();
+// dest = colsPermutation() * y.topRows(cols());
m_info = Success;
}
@@ -179,11 +188,7 @@ class SPQR
PermutationType colsPermutation() const
{
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
- Index n = m_cR->ncol;
- PermutationType colsPerm(n);
- for(Index j = 0; j <n; j++) colsPerm.indices()(j) = m_E[j];
- return colsPerm;
-
+ return PermutationType(m_E, m_cR->ncol);
}
/**
* Gets the rank of the matrix.
@@ -197,7 +202,11 @@ class SPQR
/// Set the fill-reducing ordering method to be used
void setSPQROrdering(int ord) { m_ordering = ord;}
/// Set the tolerance tol to treat columns with 2-norm < =tol as zero
- void setPivotThreshold(const RealScalar& tol) { m_tolerance = tol; }
+ void setPivotThreshold(const RealScalar& tol)
+ {
+ m_useDefaultThreshold = false;
+ m_tolerance = tol;
+ }
/** \returns a pointer to the SPQR workspace */
cholmod_common *cholmodCommon() const { return &m_cc; }
@@ -214,7 +223,6 @@ class SPQR
return m_info;
}
protected:
- bool m_isInitialized;
bool m_analysisIsOk;
bool m_factorizationIsOk;
mutable bool m_isRUpToDate;
@@ -230,6 +238,7 @@ class SPQR
mutable cholmod_dense *m_HTau; // The Householder coefficients
mutable Index m_rank; // The rank of the matrix
mutable cholmod_common m_cc; // Workspace and parameters
+ bool m_useDefaultThreshold; // Use default threshold
template<typename ,typename > friend struct SPQR_QProduct;
};
@@ -293,22 +302,5 @@ struct SPQRMatrixQTransposeReturnType{
const SPQRType& m_spqr;
};
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<SPQR<_MatrixType>, Rhs>
- : solve_retval_base<SPQR<_MatrixType>, Rhs>
-{
- typedef SPQR<_MatrixType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-} // end namespace internal
-
}// End namespace Eigen
#endif
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
new file mode 100644
index 000000000..dad59bcca
--- /dev/null
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -0,0 +1,1172 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD"
+// research report written by Ming Gu and Stanley C.Eisenstat
+// The code variable names correspond to the names they used in their
+// report
+//
+// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
+// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
+// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
+// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
+// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BDCSVD_H
+#define EIGEN_BDCSVD_H
+// #define EIGEN_BDCSVD_DEBUG_VERBOSE
+// #define EIGEN_BDCSVD_SANITY_CHECKS
+namespace Eigen {
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+IOFormat bdcsvdfmt(8, 0, ", ", "\n", " [", "]");
+#endif
+
+template<typename _MatrixType> class BDCSVD;
+
+namespace internal {
+
+template<typename _MatrixType>
+struct traits<BDCSVD<_MatrixType> >
+{
+ typedef _MatrixType MatrixType;
+};
+
+} // end namespace internal
+
+
+/** \ingroup SVD_Module
+ *
+ *
+ * \class BDCSVD
+ *
+ * \brief class Bidiagonal Divide and Conquer SVD
+ *
+ * \param MatrixType the type of the matrix of which we are computing the SVD decomposition
+ * We plan to have a very similar interface to JacobiSVD on this class.
+ * It should be used to speed up the calcul of SVD for big matrices.
+ */
+template<typename _MatrixType>
+class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
+{
+ typedef SVDBase<BDCSVD> Base;
+
+public:
+ using Base::rows;
+ using Base::cols;
+ using Base::computeU;
+ using Base::computeV;
+
+ typedef _MatrixType MatrixType;
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+ typedef typename MatrixType::Index Index;
+ enum {
+ RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+ DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime),
+ MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+ MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime),
+ MatrixOptions = MatrixType::Options
+ };
+
+ typedef typename Base::MatrixUType MatrixUType;
+ typedef typename Base::MatrixVType MatrixVType;
+ typedef typename Base::SingularValuesType SingularValuesType;
+
+ typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX;
+ typedef Matrix<RealScalar, Dynamic, Dynamic> MatrixXr;
+ typedef Matrix<RealScalar, Dynamic, 1> VectorType;
+ typedef Array<RealScalar, Dynamic, 1> ArrayXr;
+ typedef Array<Index,1,Dynamic> ArrayXi;
+
+ /** \brief Default Constructor.
+ *
+ * The default constructor is useful in cases in which the user intends to
+ * perform decompositions via BDCSVD::compute(const MatrixType&).
+ */
+ BDCSVD() : m_algoswap(16), m_numIters(0)
+ {}
+
+
+ /** \brief Default Constructor with memory preallocation
+ *
+ * Like the default constructor but with preallocation of the internal data
+ * according to the specified problem size.
+ * \sa BDCSVD()
+ */
+ BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)
+ : m_algoswap(16), m_numIters(0)
+ {
+ allocate(rows, cols, computationOptions);
+ }
+
+ /** \brief Constructor performing the decomposition of given matrix.
+ *
+ * \param matrix the matrix to decompose
+ * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
+ * By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
+ * #ComputeFullV, #ComputeThinV.
+ *
+ * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
+ * available with the (non - default) FullPivHouseholderQR preconditioner.
+ */
+ BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
+ : m_algoswap(16), m_numIters(0)
+ {
+ compute(matrix, computationOptions);
+ }
+
+ ~BDCSVD()
+ {
+ }
+
+ /** \brief Method performing the decomposition of given matrix using custom options.
+ *
+ * \param matrix the matrix to decompose
+ * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
+ * By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
+ * #ComputeFullV, #ComputeThinV.
+ *
+ * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
+ * available with the (non - default) FullPivHouseholderQR preconditioner.
+ */
+ BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
+
+ /** \brief Method performing the decomposition of given matrix using current options.
+ *
+ * \param matrix the matrix to decompose
+ *
+ * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
+ */
+ BDCSVD& compute(const MatrixType& matrix)
+ {
+ return compute(matrix, this->m_computationOptions);
+ }
+
+ void setSwitchSize(int s)
+ {
+ eigen_assert(s>3 && "BDCSVD the size of the algo switch has to be greater than 3");
+ m_algoswap = s;
+ }
+
+private:
+ void allocate(Index rows, Index cols, unsigned int computationOptions);
+ void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
+ void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
+ void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, VectorType& singVals, ArrayXr& shifts, ArrayXr& mus);
+ void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat);
+ void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V);
+ void deflation43(Index firstCol, Index shift, Index i, Index size);
+ void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
+ void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
+ template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
+ void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
+ static void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
+ static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift);
+
+protected:
+ MatrixXr m_naiveU, m_naiveV;
+ MatrixXr m_computed;
+ Index m_nRec;
+ int m_algoswap;
+ bool m_isTranspose, m_compU, m_compV;
+
+ using Base::m_singularValues;
+ using Base::m_diagSize;
+ using Base::m_computeFullU;
+ using Base::m_computeFullV;
+ using Base::m_computeThinU;
+ using Base::m_computeThinV;
+ using Base::m_matrixU;
+ using Base::m_matrixV;
+ using Base::m_isInitialized;
+ using Base::m_nonzeroSingularValues;
+
+public:
+ int m_numIters;
+}; //end class BDCSVD
+
+
+// Method to allocate and initialize matrix and attributes
+template<typename MatrixType>
+void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
+{
+ m_isTranspose = (cols > rows);
+
+ if (Base::allocate(rows, cols, computationOptions))
+ return;
+
+ m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );
+ m_compU = computeV();
+ m_compV = computeU();
+ if (m_isTranspose)
+ std::swap(m_compU, m_compV);
+
+ if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );
+ else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
+
+ if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
+}// end allocate
+
+template<typename MatrixType>
+BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions)
+{
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "\n\n\n======================================================================================================================\n\n\n";
+#endif
+ allocate(matrix.rows(), matrix.cols(), computationOptions);
+ using std::abs;
+
+ //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
+ RealScalar scale = matrix.cwiseAbs().maxCoeff();
+ if(scale==RealScalar(0)) scale = RealScalar(1);
+ MatrixX copy;
+ if (m_isTranspose) copy = matrix.adjoint()/scale;
+ else copy = matrix/scale;
+
+ //**** step 1 - Bidiagonalization
+ internal::UpperBidiagonalization<MatrixX> bid(copy);
+
+ //**** step 2 - Divide & Conquer
+ m_naiveU.setZero();
+ m_naiveV.setZero();
+ m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
+ m_computed.template bottomRows<1>().setZero();
+ divide(0, m_diagSize - 1, 0, 0, 0);
+
+ //**** step 3 - Copy singular values and vectors
+ for (int i=0; i<m_diagSize; i++)
+ {
+ RealScalar a = abs(m_computed.coeff(i, i));
+ m_singularValues.coeffRef(i) = a * scale;
+ if (a == 0)
+ {
+ m_nonzeroSingularValues = i;
+ m_singularValues.tail(m_diagSize - i - 1).setZero();
+ break;
+ }
+ else if (i == m_diagSize - 1)
+ {
+ m_nonzeroSingularValues = i + 1;
+ break;
+ }
+ }
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+// std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
+// std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
+#endif
+ if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);
+ else copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);
+
+ m_isInitialized = true;
+ return *this;
+}// end compute
+
+
+template<typename MatrixType>
+template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
+void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)
+{
+ // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
+ if (computeU())
+ {
+ Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
+ m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
+ m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
+ householderU.applyThisOnTheLeft(m_matrixU);
+ }
+ if (computeV())
+ {
+ Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
+ m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
+ m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
+ householderV.applyThisOnTheLeft(m_matrixV);
+ }
+}
+
+/** \internal
+ * Performs A = A * B exploiting the special structure of the matrix A. Splitting A as:
+ * A = [A1]
+ * [A2]
+ * such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros.
+ * We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large
+ * enough.
+ */
+template<typename MatrixType>
+void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)
+{
+ Index n = A.rows();
+ if(n>100)
+ {
+ // If the matrices are large enough, let's exploit the sparse structure of A by
+ // splitting it in half (wrt n1), and packing the non-zero columns.
+ DenseIndex n2 = n - n1;
+ MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n);
+ Index k1=0, k2=0;
+ for(Index j=0; j<n; ++j)
+ {
+ if( (A.col(j).head(n1).array()!=0).any() )
+ {
+ A1.col(k1) = A.col(j).head(n1);
+ B1.row(k1) = B.row(j);
+ ++k1;
+ }
+ if( (A.col(j).tail(n2).array()!=0).any() )
+ {
+ A2.col(k2) = A.col(j).tail(n2);
+ B2.row(k2) = B.row(j);
+ ++k2;
+ }
+ }
+
+ A.topRows(n1).noalias() = A1.leftCols(k1) * B1.topRows(k1);
+ A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
+ }
+ else
+ A *= B; // FIXME this requires a temporary
+}
+
+// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
+// place of the submatrix we are currently working on.
+
+//@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;
+//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU;
+// lastCol + 1 - firstCol is the size of the submatrix.
+//@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)
+//@param firstRowW : Same as firstRowW with the column.
+//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix
+// to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
+template<typename MatrixType>
+void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift)
+{
+ // requires nbRows = nbCols + 1;
+ using std::pow;
+ using std::sqrt;
+ using std::abs;
+ const Index n = lastCol - firstCol + 1;
+ const Index k = n/2;
+ RealScalar alphaK;
+ RealScalar betaK;
+ RealScalar r0;
+ RealScalar lambda, phi, c0, s0;
+ VectorType l, f;
+ // We use the other algorithm which is more efficient for small
+ // matrices.
+ if (n < m_algoswap)
+ {
+ JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ;
+ if (m_compU)
+ m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
+ else
+ {
+ m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
+ m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);
+ }
+ if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();
+ m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
+ m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);
+ return;
+ }
+ // We use the divide and conquer algorithm
+ alphaK = m_computed(firstCol + k, firstCol + k);
+ betaK = m_computed(firstCol + k + 1, firstCol + k);
+ // The divide must be done in that order in order to have good results. Divide change the data inside the submatrices
+ // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the
+ // right submatrix before the left one.
+ divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
+ divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
+
+ if (m_compU)
+ {
+ lambda = m_naiveU(firstCol + k, firstCol + k);
+ phi = m_naiveU(firstCol + k + 1, lastCol + 1);
+ }
+ else
+ {
+ lambda = m_naiveU(1, firstCol + k);
+ phi = m_naiveU(0, lastCol + 1);
+ }
+ r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi));
+ if (m_compU)
+ {
+ l = m_naiveU.row(firstCol + k).segment(firstCol, k);
+ f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);
+ }
+ else
+ {
+ l = m_naiveU.row(1).segment(firstCol, k);
+ f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
+ }
+ if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1;
+ if (r0 == 0)
+ {
+ c0 = 1;
+ s0 = 0;
+ }
+ else
+ {
+ c0 = alphaK * lambda / r0;
+ s0 = betaK * phi / r0;
+ }
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(m_naiveU.allFinite());
+ assert(m_naiveV.allFinite());
+ assert(m_computed.allFinite());
+#endif
+
+ if (m_compU)
+ {
+ MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));
+ // we shiftW Q1 to the right
+ for (Index i = firstCol + k - 1; i >= firstCol; i--)
+ m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1);
+ // we shift q1 at the left with a factor c0
+ m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0);
+ // last column = q1 * - s0
+ m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0));
+ // first column = q2 * s0
+ m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0;
+ // q2 *= c0
+ m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;
+ }
+ else
+ {
+ RealScalar q1 = (m_naiveU(0, firstCol + k));
+ // we shift Q1 to the right
+ for (Index i = firstCol + k - 1; i >= firstCol; i--)
+ m_naiveU(0, i + 1) = m_naiveU(0, i);
+ // we shift q1 at the left with a factor c0
+ m_naiveU(0, firstCol) = (q1 * c0);
+ // last column = q1 * - s0
+ m_naiveU(0, lastCol + 1) = (q1 * ( - s0));
+ // first column = q2 * s0
+ m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0;
+ // q2 *= c0
+ m_naiveU(1, lastCol + 1) *= c0;
+ m_naiveU.row(1).segment(firstCol + 1, k).setZero();
+ m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();
+ }
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(m_naiveU.allFinite());
+ assert(m_naiveV.allFinite());
+ assert(m_computed.allFinite());
+#endif
+
+ m_computed(firstCol + shift, firstCol + shift) = r0;
+ m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real();
+ m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real();
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ ArrayXr tmp1 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
+#endif
+ // Second part: try to deflate singular values in combined matrix
+ deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ ArrayXr tmp2 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
+ std::cout << "\n\nj1 = " << tmp1.transpose().format(bdcsvdfmt) << "\n";
+ std::cout << "j2 = " << tmp2.transpose().format(bdcsvdfmt) << "\n\n";
+ std::cout << "err: " << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << "\n";
+ static int count = 0;
+ std::cout << "# " << ++count << "\n\n";
+ assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
+// assert(count<681);
+// assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
+#endif
+
+ // Third part: compute SVD of combined matrix
+ MatrixXr UofSVD, VofSVD;
+ VectorType singVals;
+ computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(UofSVD.allFinite());
+ assert(VofSVD.allFinite());
+#endif
+
+ if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
+ else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time
+
+ if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(m_naiveU.allFinite());
+ assert(m_naiveV.allFinite());
+ assert(m_computed.allFinite());
+#endif
+
+ m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
+ m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
+}// end divide
+
+// Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
+// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
+// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
+// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
+//
+// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
+// handling of round-off errors, be consistent in ordering
+// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
+template <typename MatrixType>
+void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
+{
+ // TODO Get rid of these copies (?)
+ // FIXME at least preallocate them
+ ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n);
+ ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal();
+ diag(0) = 0;
+
+ // Allocate space for singular values and vectors
+ singVals.resize(n);
+ U.resize(n+1, n+1);
+ if (m_compV) V.resize(n, n);
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ if (col0.hasNaN() || diag.hasNaN())
+ std::cout << "\n\nHAS NAN\n\n";
+#endif
+
+ // Many singular values might have been deflated, the zero ones have been moved to the end,
+ // but others are interleaved and we must ignore them at this stage.
+ // To this end, let's compute a permutation skipping them:
+ Index actual_n = n;
+ while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
+ Index m = 0; // size of the deflated problem
+ ArrayXi perm(actual_n);
+ for(Index k=0;k<actual_n;++k)
+ if(col0(k)!=0)
+ perm(m++) = k;
+ perm.conservativeResize(m);
+
+ ArrayXr shifts(n), mus(n), zhat(n);
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "computeSVDofM using:\n";
+ std::cout << " z: " << col0.transpose() << "\n";
+ std::cout << " d: " << diag.transpose() << "\n";
+#endif
+
+ // Compute singVals, shifts, and mus
+ computeSingVals(col0, diag, perm, singVals, shifts, mus);
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << " j: " << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << "\n\n";
+ std::cout << " sing-val: " << singVals.transpose() << "\n";
+ std::cout << " mu: " << mus.transpose() << "\n";
+ std::cout << " shift: " << shifts.transpose() << "\n";
+
+ {
+ Index actual_n = n;
+ while(actual_n>1 && col0(actual_n-1)==0) --actual_n;
+ std::cout << "\n\n mus: " << mus.head(actual_n).transpose() << "\n\n";
+ std::cout << " check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
+ std::cout << " check2 (>0) : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
+ std::cout << " check3 (>0) : " << ((diag.segment(1,actual_n-1)-singVals.head(actual_n-1).array()) / singVals.head(actual_n-1).array()).transpose() << "\n\n\n";
+ std::cout << " check4 (>0) : " << ((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).transpose() << "\n\n\n";
+ }
+#endif
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(singVals.allFinite());
+ assert(mus.allFinite());
+ assert(shifts.allFinite());
+#endif
+
+ // Compute zhat
+ perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat);
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << " zhat: " << zhat.transpose() << "\n";
+#endif
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(zhat.allFinite());
+#endif
+
+ computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "U^T U: " << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << "\n";
+ std::cout << "V^T V: " << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << "\n";
+#endif
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(U.allFinite());
+ assert(V.allFinite());
+ assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 1e-14 * n);
+ assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 1e-14 * n);
+ assert(m_naiveU.allFinite());
+ assert(m_naiveV.allFinite());
+ assert(m_computed.allFinite());
+#endif
+
+ // Because of deflation, the singular values might not be completely sorted.
+ // Fortunately, reordering them is a O(n) problem
+ for(Index i=0; i<actual_n-1; ++i)
+ {
+ if(singVals(i)>singVals(i+1))
+ {
+ using std::swap;
+ swap(singVals(i),singVals(i+1));
+ U.col(i).swap(U.col(i+1));
+ if(m_compV) V.col(i).swap(V.col(i+1));
+ }
+ }
+
+ // Reverse order so that singular values in increased order
+ // Because of deflation, the zeros singular-values are already at the end
+ singVals.head(actual_n).reverseInPlace();
+ U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
+ if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
+ std::cout << " * j: " << jsvd.singularValues().transpose() << "\n\n";
+ std::cout << " * sing-val: " << singVals.transpose() << "\n";
+// std::cout << " * err: " << ((jsvd.singularValues()-singVals)>1e-13*singVals.norm()).transpose() << "\n";
+#endif
+}
+
+template <typename MatrixType>
+typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift)
+{
+ Index m = perm.size();
+ RealScalar res = 1;
+ for(Index i=0; i<m; ++i)
+ {
+ Index j = perm(i);
+ res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu));
+ }
+ return res;
+}
+
+template <typename MatrixType>
+void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm,
+ VectorType& singVals, ArrayXr& shifts, ArrayXr& mus)
+{
+ using std::abs;
+ using std::swap;
+
+ Index n = col0.size();
+ Index actual_n = n;
+ while(actual_n>1 && col0(actual_n-1)==0) --actual_n;
+
+ for (Index k = 0; k < n; ++k)
+ {
+ if (col0(k) == 0 || actual_n==1)
+ {
+ // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
+ // if actual_n==1, then the deflated problem is already diagonalized
+ singVals(k) = k==0 ? col0(0) : diag(k);
+ mus(k) = 0;
+ shifts(k) = k==0 ? col0(0) : diag(k);
+ continue;
+ }
+
+ // otherwise, use secular equation to find singular value
+ RealScalar left = diag(k);
+ RealScalar right; // was: = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm());
+ if(k==actual_n-1)
+ right = (diag(actual_n-1) + col0.matrix().norm());
+ else
+ {
+ // Skip deflated singular values
+ Index l = k+1;
+ while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); }
+ right = diag(l);
+ }
+
+ // first decide whether it's closer to the left end or the right end
+ RealScalar mid = left + (right-left) / 2;
+ RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0);
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << right-left << "\n";
+ std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n";
+ std::cout << " = " << secularEq(0.1*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.2*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.3*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.4*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.49*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.5*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.51*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.6*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.7*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
+ << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
+#endif
+ RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right;
+
+ // measure everything relative to shift
+ ArrayXr diagShifted = diag - shift;
+
+ // initial guess
+ RealScalar muPrev, muCur;
+ if (shift == left)
+ {
+ muPrev = (right - left) * 0.1;
+ if (k == actual_n-1) muCur = right - left;
+ else muCur = (right - left) * 0.5;
+ }
+ else
+ {
+ muPrev = -(right - left) * 0.1;
+ muCur = -(right - left) * 0.5;
+ }
+
+ RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift);
+ RealScalar fCur = secularEq(muCur, col0, diag, perm, diagShifted, shift);
+ if (abs(fPrev) < abs(fCur))
+ {
+ swap(fPrev, fCur);
+ swap(muPrev, muCur);
+ }
+
+ // rational interpolation: fit a function of the form a / mu + b through the two previous
+ // iterates and use its zero to compute the next iterate
+ bool useBisection = fPrev*fCur>0;
+ while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
+ {
+ ++m_numIters;
+
+ // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
+ RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev);
+ RealScalar b = fCur - a / muCur;
+ // And find mu such that f(mu)==0:
+ RealScalar muZero = -a/b;
+ RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);
+
+ muPrev = muCur;
+ fPrev = fCur;
+ muCur = muZero;
+ fCur = fZero;
+
+
+ if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true;
+ if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true;
+ if (abs(fCur)>abs(fPrev)) useBisection = true;
+ }
+
+ // fall back on bisection method if rational interpolation did not work
+ if (useBisection)
+ {
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "useBisection for k = " << k << ", actual_n = " << actual_n << "\n";
+#endif
+ RealScalar leftShifted, rightShifted;
+ if (shift == left)
+ {
+ leftShifted = RealScalar(1)/NumTraits<RealScalar>::highest();
+ // I don't understand why the case k==0 would be special there:
+ // if (k == 0) rightShifted = right - left; else
+ rightShifted = (k==actual_n-1) ? right : ((right - left) * 0.6); // theoretically we can take 0.5, but let's be safe
+ }
+ else
+ {
+ leftShifted = -(right - left) * 0.6;
+ rightShifted = -RealScalar(1)/NumTraits<RealScalar>::highest();
+ }
+
+ RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
+ RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ if(!(fLeft * fRight<0))
+ std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n";
+#endif
+ eigen_internal_assert(fLeft * fRight < 0);
+
+ while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted)))
+ {
+ RealScalar midShifted = (leftShifted + rightShifted) / 2;
+ RealScalar fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
+ if (fLeft * fMid < 0)
+ {
+ rightShifted = midShifted;
+ fRight = fMid;
+ }
+ else
+ {
+ leftShifted = midShifted;
+ fLeft = fMid;
+ }
+ }
+
+ muCur = (leftShifted + rightShifted) / 2;
+ }
+
+ singVals[k] = shift + muCur;
+ shifts[k] = shift;
+ mus[k] = muCur;
+
+ // perturb singular value slightly if it equals diagonal entry to avoid division by zero later
+ // (deflation is supposed to avoid this from happening)
+ // - this does no seem to be necessary anymore -
+// if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();
+// if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();
+ }
+}
+
+
+// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
+template <typename MatrixType>
+void BDCSVD<MatrixType>::perturbCol0
+ (const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
+ const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat)
+{
+ using std::sqrt;
+ Index n = col0.size();
+ Index m = perm.size();
+ if(m==0)
+ {
+ zhat.setZero();
+ return;
+ }
+ Index last = perm(m-1);
+ // The offset permits to skip deflated entries while computing zhat
+ for (Index k = 0; k < n; ++k)
+ {
+ if (col0(k) == 0) // deflated
+ zhat(k) = 0;
+ else
+ {
+ // see equation (3.6)
+ RealScalar dk = diag(k);
+ RealScalar prod = (singVals(last) + dk) * (mus(last) + (shifts(last) - dk));
+
+ for(Index l = 0; l<m; ++l)
+ {
+ Index i = perm(l);
+ if(i!=k)
+ {
+ Index j = i<k ? i : perm(l-1);
+ prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ if(i!=k && std::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )
+ std::cout << " " << ((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk))
+ << ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n";
+#endif
+ }
+ }
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
+#endif
+ RealScalar tmp = sqrt(prod);
+ zhat(k) = col0(k) > 0 ? tmp : -tmp;
+ }
+ }
+}
+
+// compute singular vectors
+template <typename MatrixType>
+void BDCSVD<MatrixType>::computeSingVecs
+ (const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
+ const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V)
+{
+ Index n = zhat.size();
+ Index m = perm.size();
+
+ for (Index k = 0; k < n; ++k)
+ {
+ if (zhat(k) == 0)
+ {
+ U.col(k) = VectorType::Unit(n+1, k);
+ if (m_compV) V.col(k) = VectorType::Unit(n, k);
+ }
+ else
+ {
+ U.col(k).setZero();
+ for(Index l=0;l<m;++l)
+ {
+ Index i = perm(l);
+ U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
+ }
+ U(n,k) = 0;
+ U.col(k).normalize();
+
+ if (m_compV)
+ {
+ V.col(k).setZero();
+ for(Index l=1;l<m;++l)
+ {
+ Index i = perm(l);
+ V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
+ }
+ V(0,k) = -1;
+ V.col(k).normalize();
+ }
+ }
+ }
+ U.col(n) = VectorType::Unit(n+1, n);
+}
+
+
+// page 12_13
+// i >= 1, di almost null and zi non null.
+// We use a rotation to zero out zi applied to the left of M
+template <typename MatrixType>
+void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size)
+{
+ using std::abs;
+ using std::sqrt;
+ using std::pow;
+ Index start = firstCol + shift;
+ RealScalar c = m_computed(start, start);
+ RealScalar s = m_computed(start+i, start);
+ RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
+ if (r == 0)
+ {
+ m_computed(start+i, start+i) = 0;
+ return;
+ }
+ m_computed(start,start) = r;
+ m_computed(start+i, start) = 0;
+ m_computed(start+i, start+i) = 0;
+
+ JacobiRotation<RealScalar> J(c/r,-s/r);
+ if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
+ else m_naiveU.applyOnTheRight(firstCol, firstCol+i, J);
+}// end deflation 43
+
+
+// page 13
+// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)
+// We apply two rotations to have zj = 0;
+// TODO deflation44 is still broken and not properly tested
+template <typename MatrixType>
+void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size)
+{
+ using std::abs;
+ using std::sqrt;
+ using std::conj;
+ using std::pow;
+ RealScalar c = m_computed(firstColm+i, firstColm);
+ RealScalar s = m_computed(firstColm+j, firstColm);
+ RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; "
+ << m_computed(firstColm + i-1, firstColm) << " "
+ << m_computed(firstColm + i, firstColm) << " "
+ << m_computed(firstColm + i+1, firstColm) << " "
+ << m_computed(firstColm + i+2, firstColm) << "\n";
+ std::cout << m_computed(firstColm + i-1, firstColm + i-1) << " "
+ << m_computed(firstColm + i, firstColm+i) << " "
+ << m_computed(firstColm + i+1, firstColm+i+1) << " "
+ << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
+#endif
+ if (r==0)
+ {
+ m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
+ return;
+ }
+ c/=r;
+ s/=r;
+ m_computed(firstColm + i, firstColm) = r;
+ m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
+ m_computed(firstColm + j, firstColm) = 0;
+
+ JacobiRotation<RealScalar> J(c,-s);
+ if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
+ else m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J);
+ if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);
+}// end deflation 44
+
+
+// acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
+template <typename MatrixType>
+void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift)
+{
+ using std::sqrt;
+ using std::abs;
+ const Index length = lastCol + 1 - firstCol;
+
+ Block<MatrixXr,Dynamic,1> col0(m_computed, firstCol+shift, firstCol+shift, length, 1);
+ Diagonal<MatrixXr> fulldiag(m_computed);
+ VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);
+
+ RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
+ RealScalar epsilon_strict = NumTraits<RealScalar>::epsilon() * maxDiag;
+ RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag);
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(m_naiveU.allFinite());
+ assert(m_naiveV.allFinite());
+ assert(m_computed.allFinite());
+#endif
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "\ndeflate:" << diag.head(k+1).transpose() << " | " << diag.segment(k+1,length-k-1).transpose() << "\n";
+#endif
+
+ //condition 4.1
+ if (diag(0) < epsilon_coarse)
+ {
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "deflation 4.1, because " << diag(0) << " < " << epsilon_coarse << "\n";
+#endif
+ diag(0) = epsilon_coarse;
+ }
+
+ //condition 4.2
+ for (Index i=1;i<length;++i)
+ if (abs(col0(i)) < epsilon_strict)
+ {
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n";
+#endif
+ col0(i) = 0;
+ }
+
+ //condition 4.3
+ for (Index i=1;i<length; i++)
+ if (diag(i) < epsilon_coarse)
+ {
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "deflation 4.3, cancel z(" << i << ")=" << col0(i) << " because diag(" << i << ")=" << diag(i) << " < " << epsilon_coarse << "\n";
+#endif
+ deflation43(firstCol, shift, i, length);
+ }
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(m_naiveU.allFinite());
+ assert(m_naiveV.allFinite());
+ assert(m_computed.allFinite());
+#endif
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "to be sorted: " << diag.transpose() << "\n\n";
+#endif
+ {
+ // Check for total deflation
+ // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting
+ bool total_deflation = (col0.tail(length-1).array()==RealScalar(0)).all();
+
+ // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
+ // First, compute the respective permutation.
+ Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation
+ {
+ permutation[0] = 0;
+ Index p = 1;
+
+ // Move deflated diagonal entries at the end.
+ for(Index i=1; i<length; ++i)
+ if(diag(i)==0)
+ permutation[p++] = i;
+
+ Index i=1, j=k+1;
+ for( ; p < length; ++p)
+ {
+ if (i > k) permutation[p] = j++;
+ else if (j >= length) permutation[p] = i++;
+ else if (diag(i) < diag(j)) permutation[p] = j++;
+ else permutation[p] = i++;
+ }
+ }
+
+ // If we have a total deflation, then we have to insert diag(0) at the right place
+ if(total_deflation)
+ {
+ for(Index i=1; i<length; ++i)
+ {
+ Index pi = permutation[i];
+ if(diag(pi)==0 || diag(0)<diag(pi))
+ permutation[i-1] = permutation[i];
+ else
+ {
+ permutation[i-1] = 0;
+ break;
+ }
+ }
+ }
+
+ // Current index of each col, and current column of each index
+ Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation
+ Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation
+
+ for(int pos = 0; pos< length; pos++)
+ {
+ realCol[pos] = pos;
+ realInd[pos] = pos;
+ }
+
+ for(Index i = total_deflation?0:1; i < length; i++)
+ {
+ const Index pi = permutation[length - (total_deflation ? i+1 : i)];
+ const Index J = realCol[pi];
+
+ using std::swap;
+ // swap diagonal and first column entries:
+ swap(diag(i), diag(J));
+ if(i!=0 && J!=0) swap(col0(i), col0(J));
+
+ // change columns
+ if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + 1).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + 1));
+ else m_naiveU.col(firstCol+i).segment(0, 2) .swap(m_naiveU.col(firstCol+J).segment(0, 2));
+ if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length));
+
+ //update real pos
+ const Index realI = realInd[i];
+ realCol[realI] = J;
+ realCol[pi] = i;
+ realInd[J] = realI;
+ realInd[i] = pi;
+ }
+ delete[] permutation;
+ delete[] realInd;
+ delete[] realCol;
+ }
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
+ std::cout << " : " << col0.transpose() << "\n\n";
+#endif
+
+ //condition 4.4
+ {
+ Index i = length-1;
+ while(i>0 && (diag(i)==0 || col0(i)==0)) --i;
+ for(; i>1;--i)
+ if( (diag(i) - diag(i-1)) < NumTraits<RealScalar>::epsilon()*maxDiag )
+ {
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+ std::cout << "deflation 4.4 with i = " << i << " because " << (diag(i) - diag(i-1)) << " < " << NumTraits<RealScalar>::epsilon()*diag(i) << "\n";
+#endif
+ eigen_internal_assert(abs(diag(i) - diag(i-1))<epsilon_coarse && " diagonal entries are not properly sorted");
+ deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-1, i, length);
+ }
+ }
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ for(Index j=2;j<length;++j)
+ assert(diag(j-1)<=diag(j) || diag(j)==0);
+#endif
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+ assert(m_naiveU.allFinite());
+ assert(m_naiveV.allFinite());
+ assert(m_computed.allFinite());
+#endif
+}//end deflation
+
+#ifndef __CUDACC__
+/** \svd_module
+ *
+ * \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm
+ *
+ * \sa class BDCSVD
+ */
+template<typename Derived>
+BDCSVD<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const
+{
+ return BDCSVD<PlainObject>(*this, computationOptions);
+}
+#endif
+
+} // end namespace Eigen
+
+#endif
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 3ab8a4c8a..444187ae7 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -550,7 +550,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
* according to the specified problem size.
* \sa JacobiSVD()
*/
- JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
+ explicit JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
{
allocate(rows, cols, computationOptions);
}
@@ -565,7 +565,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
* Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
* available with the (non-default) FullPivHouseholderQR preconditioner.
*/
- JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
+ explicit JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
{
compute(matrix, computationOptions);
}
@@ -593,27 +593,12 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
return compute(matrix, m_computationOptions);
}
- /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
- *
- * \param b the right-hand-side of the equation to solve.
- *
- * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
- *
- * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving.
- * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
- */
- template<typename Rhs>
- inline const internal::solve_retval<JacobiSVD, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
- eigen_assert(computeU() && computeV() && "JacobiSVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
- return internal::solve_retval<JacobiSVD, Rhs>(*this, b.derived());
- }
-
using Base::computeU;
using Base::computeV;
-
+ using Base::rows;
+ using Base::cols;
+ using Base::rank;
+
private:
void allocate(Index rows, Index cols, unsigned int computationOptions);
@@ -643,6 +628,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreColsThanRows> m_qr_precond_morecols;
internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols> m_qr_precond_morerows;
+ MatrixType m_scaledMatrix;
};
template<typename MatrixType, int QRPreconditioner>
@@ -689,8 +675,9 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, u
: 0);
m_workMatrix.resize(m_diagSize, m_diagSize);
- if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this);
- if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this);
+ if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this);
+ if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this);
+ if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols);
}
template<typename MatrixType, int QRPreconditioner>
@@ -707,21 +694,26 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
// limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min();
+ // Scaling factor to reduce over/under-flows
+ RealScalar scale = matrix.cwiseAbs().maxCoeff();
+ if(scale==RealScalar(0)) scale = RealScalar(1);
+
/*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
- if(!m_qr_precond_morecols.run(*this, matrix) && !m_qr_precond_morerows.run(*this, matrix))
+ if(m_rows!=m_cols)
+ {
+ m_scaledMatrix = matrix / scale;
+ m_qr_precond_morecols.run(*this, m_scaledMatrix);
+ m_qr_precond_morerows.run(*this, m_scaledMatrix);
+ }
+ else
{
- m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize);
+ m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize) / scale;
if(m_computeFullU) m_matrixU.setIdentity(m_rows,m_rows);
if(m_computeThinU) m_matrixU.setIdentity(m_rows,m_diagSize);
if(m_computeFullV) m_matrixV.setIdentity(m_cols,m_cols);
if(m_computeThinV) m_matrixV.setIdentity(m_cols, m_diagSize);
}
-
- // Scaling factor to reduce over/under-flows
- RealScalar scale = m_workMatrix.cwiseAbs().maxCoeff();
- if(scale==RealScalar(0)) scale = RealScalar(1);
- m_workMatrix /= scale;
/*** step 2. The main Jacobi SVD iteration. ***/
@@ -739,8 +731,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
// if this 2x2 sub-matrix is not diagonal already...
// notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
// keep us iterating forever. Similarly, small denormal numbers are considered zero.
- EIGEN_USING_STD_MATH(max);
- RealScalar threshold = (max)(considerAsZero, precision * (max)(abs(m_workMatrix.coeff(p,p)),
+ RealScalar threshold = numext::maxi(considerAsZero, precision * numext::maxi(abs(m_workMatrix.coeff(p,p)),
abs(m_workMatrix.coeff(q,q))));
// We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
@@ -799,31 +790,6 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
return *this;
}
-namespace internal {
-template<typename _MatrixType, int QRPreconditioner, typename Rhs>
-struct solve_retval<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
- : solve_retval_base<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
-{
- typedef JacobiSVD<_MatrixType, QRPreconditioner> JacobiSVDType;
- EIGEN_MAKE_SOLVE_HELPERS(JacobiSVDType,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- eigen_assert(rhs().rows() == dec().rows());
-
- // A = U S V^*
- // So A^{-1} = V S^{-1} U^*
-
- Matrix<Scalar, Dynamic, Rhs::ColsAtCompileTime, 0, _MatrixType::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime> tmp;
- Index rank = dec().rank();
-
- tmp.noalias() = dec().matrixU().leftCols(rank).adjoint() * rhs();
- tmp = dec().singularValues().head(rank).asDiagonal().inverse() * tmp;
- dst = dec().matrixV().leftCols(rank) * tmp;
- }
-};
-} // end namespace internal
-
#ifndef __CUDACC__
/** \svd_module
*
diff --git a/Eigen/src/SVD/JacobiSVD_MKL.h b/Eigen/src/SVD/JacobiSVD_MKL.h
index decda7540..14e461c4e 100644
--- a/Eigen/src/SVD/JacobiSVD_MKL.h
+++ b/Eigen/src/SVD/JacobiSVD_MKL.h
@@ -45,8 +45,8 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) \
{ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
- typedef MatrixType::Scalar Scalar; \
- typedef MatrixType::RealScalar RealScalar; \
+ /*typedef MatrixType::Scalar Scalar;*/ \
+ /*typedef MatrixType::RealScalar RealScalar;*/ \
allocate(matrix.rows(), matrix.cols(), computationOptions); \
\
/*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \
diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index 61b01fb8a..27b732b80 100644
--- a/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h
@@ -190,6 +190,30 @@ public:
inline Index rows() const { return m_rows; }
inline Index cols() const { return m_cols; }
+
+ /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
+ *
+ * \param b the right-hand-side of the equation to solve.
+ *
+ * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
+ *
+ * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving.
+ * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
+ */
+ template<typename Rhs>
+ inline const Solve<Derived, Rhs>
+ solve(const MatrixBase<Rhs>& b) const
+ {
+ eigen_assert(m_isInitialized && "SVD is not initialized.");
+ eigen_assert(computeU() && computeV() && "SVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
+ return Solve<Derived, Rhs>(derived(), b.derived());
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ void _solve_impl(const RhsType &rhs, DstType &dst) const;
+ #endif
protected:
// return true if already allocated
@@ -220,6 +244,23 @@ protected:
};
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename Derived>
+template<typename RhsType, typename DstType>
+void SVDBase<Derived>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+ eigen_assert(rhs.rows() == rows());
+
+ // A = U S V^*
+ // So A^{-1} = V S^{-1} U^*
+
+ Matrix<Scalar, Dynamic, RhsType::ColsAtCompileTime, 0, MatrixType::MaxRowsAtCompileTime, RhsType::MaxColsAtCompileTime> tmp;
+ Index l_rank = rank();
+ tmp.noalias() = m_matrixU.leftCols(l_rank).adjoint() * rhs;
+ tmp = m_singularValues.head(l_rank).asDiagonal().inverse() * tmp;
+ dst = m_matrixV.leftCols(l_rank) * tmp;
+}
+#endif
template<typename MatrixType>
bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
diff --git a/Eigen/src/SVD/UpperBidiagonalization.h b/Eigen/src/SVD/UpperBidiagonalization.h
index 64906bf0c..eaa6bb86e 100644
--- a/Eigen/src/SVD/UpperBidiagonalization.h
+++ b/Eigen/src/SVD/UpperBidiagonalization.h
@@ -37,7 +37,7 @@ template<typename _MatrixType> class UpperBidiagonalization
typedef Matrix<Scalar, ColsAtCompileTimeMinusOne, 1> SuperDiagVectorType;
typedef HouseholderSequence<
const MatrixType,
- CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Diagonal<const MatrixType,0> >
+ const typename internal::remove_all<typename Diagonal<const MatrixType,0>::ConjugateReturnType>::type
> HouseholderUSequenceType;
typedef HouseholderSequence<
const typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type,
@@ -53,7 +53,7 @@ template<typename _MatrixType> class UpperBidiagonalization
*/
UpperBidiagonalization() : m_householder(), m_bidiagonal(), m_isInitialized(false) {}
- UpperBidiagonalization(const MatrixType& matrix)
+ explicit UpperBidiagonalization(const MatrixType& matrix)
: m_householder(matrix.rows(), matrix.cols()),
m_bidiagonal(matrix.cols(), matrix.cols()),
m_isInitialized(false)
@@ -220,10 +220,10 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint();
}
- // 5 - construct right Householder transform in-placecols
+ // 5 - construct right Householder transform in-place
u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]);
- // this eases the application of Householder transforAions
+ // this eases the application of Householder transformations
// A(k,k+1) will store tau_u later
A(k,k+1) = Scalar(1);
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index e1f96ba5a..22325d7f4 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -17,6 +17,27 @@ enum SimplicialCholeskyMode {
SimplicialCholeskyLDLT
};
+namespace internal {
+ template<typename CholMatrixType, typename InputMatrixType>
+ struct simplicial_cholesky_grab_input {
+ typedef CholMatrixType const * ConstCholMatrixPtr;
+ static void run(const InputMatrixType& input, ConstCholMatrixPtr &pmat, CholMatrixType &tmp)
+ {
+ tmp = input;
+ pmat = &tmp;
+ }
+ };
+
+ template<typename MatrixType>
+ struct simplicial_cholesky_grab_input<MatrixType,MatrixType> {
+ typedef MatrixType const * ConstMatrixPtr;
+ static void run(const MatrixType& input, ConstMatrixPtr &pmat, MatrixType &/*tmp*/)
+ {
+ pmat = &input;
+ }
+ };
+} // end namespace internal
+
/** \ingroup SparseCholesky_Module
* \brief A direct sparse Cholesky factorizations
*
@@ -33,8 +54,11 @@ enum SimplicialCholeskyMode {
*
*/
template<typename Derived>
-class SimplicialCholeskyBase : internal::noncopyable
+class SimplicialCholeskyBase : public SparseSolverBase<Derived>
{
+ typedef SparseSolverBase<Derived> Base;
+ using Base::m_isInitialized;
+
public:
typedef typename internal::traits<Derived>::MatrixType MatrixType;
typedef typename internal::traits<Derived>::OrderingType OrderingType;
@@ -43,17 +67,20 @@ class SimplicialCholeskyBase : internal::noncopyable
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
typedef SparseMatrix<Scalar,ColMajor,Index> CholMatrixType;
+ typedef CholMatrixType const * ConstCholMatrixPtr;
typedef Matrix<Scalar,Dynamic,1> VectorType;
public:
+
+ using Base::derived;
/** Default constructor */
SimplicialCholeskyBase()
- : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1)
+ : m_info(Success), m_shiftOffset(0), m_shiftScale(1)
{}
- SimplicialCholeskyBase(const MatrixType& matrix)
- : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1)
+ explicit SimplicialCholeskyBase(const MatrixType& matrix)
+ : m_info(Success), m_shiftOffset(0), m_shiftScale(1)
{
derived().compute(matrix);
}
@@ -79,34 +106,6 @@ class SimplicialCholeskyBase : internal::noncopyable
return m_info;
}
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::solve_retval<SimplicialCholeskyBase, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized.");
- eigen_assert(rows()==b.rows()
- && "SimplicialCholeskyBase::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<SimplicialCholeskyBase, Rhs>(*this, b.derived());
- }
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::sparse_solve_retval<SimplicialCholeskyBase, Rhs>
- solve(const SparseMatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized.");
- eigen_assert(rows()==b.rows()
- && "SimplicialCholesky::solve(): invalid number of rows of the right hand side matrix b");
- return internal::sparse_solve_retval<SimplicialCholeskyBase, Rhs>(*this, b.derived());
- }
-
/** \returns the permutation P
* \sa permutationPinv() */
const PermutationMatrix<Dynamic,Dynamic,Index>& permutationP() const
@@ -150,7 +149,7 @@ class SimplicialCholeskyBase : internal::noncopyable
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+ void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
{
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
eigen_assert(m_matrix.rows()==b.rows());
@@ -175,6 +174,12 @@ class SimplicialCholeskyBase : internal::noncopyable
if(m_P.size()>0)
dest = m_Pinv * dest;
}
+
+ template<typename Rhs,typename Dest>
+ void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const
+ {
+ internal::solve_sparse_through_dense_panels(derived(), b, dest);
+ }
#endif // EIGEN_PARSED_BY_DOXYGEN
@@ -186,10 +191,11 @@ class SimplicialCholeskyBase : internal::noncopyable
{
eigen_assert(matrix.rows()==matrix.cols());
Index size = matrix.cols();
- CholMatrixType ap(size,size);
- ordering(matrix, ap);
- analyzePattern_preordered(ap, DoLDLT);
- factorize_preordered<DoLDLT>(ap);
+ CholMatrixType tmp(size,size);
+ ConstCholMatrixPtr pmat;
+ ordering(matrix, pmat, tmp);
+ analyzePattern_preordered(*pmat, DoLDLT);
+ factorize_preordered<DoLDLT>(*pmat);
}
template<bool DoLDLT>
@@ -197,9 +203,21 @@ class SimplicialCholeskyBase : internal::noncopyable
{
eigen_assert(a.rows()==a.cols());
int size = a.cols();
- CholMatrixType ap(size,size);
- ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
- factorize_preordered<DoLDLT>(ap);
+ CholMatrixType tmp(size,size);
+ ConstCholMatrixPtr pmat;
+
+ if(m_P.size()==0 && (UpLo&Upper)==Upper)
+ {
+ // If there is no ordering, try to directly use the input matrix without any copy
+ internal::simplicial_cholesky_grab_input<CholMatrixType,MatrixType>::run(a, pmat, tmp);
+ }
+ else
+ {
+ tmp.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
+ pmat = &tmp;
+ }
+
+ factorize_preordered<DoLDLT>(*pmat);
}
template<bool DoLDLT>
@@ -209,13 +227,14 @@ class SimplicialCholeskyBase : internal::noncopyable
{
eigen_assert(a.rows()==a.cols());
int size = a.cols();
- CholMatrixType ap(size,size);
- ordering(a, ap);
- analyzePattern_preordered(ap,doLDLT);
+ CholMatrixType tmp(size,size);
+ ConstCholMatrixPtr pmat;
+ ordering(a, pmat, tmp);
+ analyzePattern_preordered(*pmat,doLDLT);
}
void analyzePattern_preordered(const CholMatrixType& a, bool doLDLT);
- void ordering(const MatrixType& a, CholMatrixType& ap);
+ void ordering(const MatrixType& a, ConstCholMatrixPtr &pmat, CholMatrixType& ap);
/** keeps off-diagonal entries; drops diagonal entries */
struct keep_diag {
@@ -226,7 +245,6 @@ class SimplicialCholeskyBase : internal::noncopyable
};
mutable ComputationInfo m_info;
- bool m_isInitialized;
bool m_factorizationIsOk;
bool m_analysisIsOk;
@@ -255,10 +273,10 @@ template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<Simp
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef SparseMatrix<Scalar, ColMajor, Index> CholMatrixType;
- typedef SparseTriangularView<CholMatrixType, Eigen::Lower> MatrixL;
- typedef SparseTriangularView<typename CholMatrixType::AdjointReturnType, Eigen::Upper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return m; }
- static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+ typedef TriangularView<const CholMatrixType, Eigen::Lower> MatrixL;
+ typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::Upper> MatrixU;
+ static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
+ static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
};
template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >
@@ -269,10 +287,10 @@ template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<Simpl
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef SparseMatrix<Scalar, ColMajor, Index> CholMatrixType;
- typedef SparseTriangularView<CholMatrixType, Eigen::UnitLower> MatrixL;
- typedef SparseTriangularView<typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return m; }
- static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+ typedef TriangularView<const CholMatrixType, Eigen::UnitLower> MatrixL;
+ typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;
+ static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
+ static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
};
template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >
@@ -321,7 +339,7 @@ public:
/** Default constructor */
SimplicialLLT() : Base() {}
/** Constructs and performs the LLT factorization of \a matrix */
- SimplicialLLT(const MatrixType& matrix)
+ explicit SimplicialLLT(const MatrixType& matrix)
: Base(matrix) {}
/** \returns an expression of the factor L */
@@ -411,7 +429,7 @@ public:
SimplicialLDLT() : Base() {}
/** Constructs and performs the LLT factorization of \a matrix */
- SimplicialLDLT(const MatrixType& matrix)
+ explicit SimplicialLDLT(const MatrixType& matrix)
: Base(matrix) {}
/** \returns a vector expression of the diagonal D */
@@ -491,7 +509,7 @@ public:
public:
SimplicialCholesky() : Base(), m_LDLT(true) {}
- SimplicialCholesky(const MatrixType& matrix)
+ explicit SimplicialCholesky(const MatrixType& matrix)
: Base(), m_LDLT(true)
{
compute(matrix);
@@ -560,7 +578,7 @@ public:
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+ void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
{
eigen_assert(Base::m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
eigen_assert(Base::m_matrix.rows()==b.rows());
@@ -596,6 +614,13 @@ public:
dest = Base::m_Pinv * dest;
}
+ /** \internal */
+ template<typename Rhs,typename Dest>
+ void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const
+ {
+ internal::solve_sparse_through_dense_panels(*this, b, dest);
+ }
+
Scalar determinant() const
{
if(m_LDLT)
@@ -614,58 +639,43 @@ public:
};
template<typename Derived>
-void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixType& ap)
+void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, ConstCholMatrixPtr &pmat, CholMatrixType& ap)
{
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
- // Note that amd compute the inverse permutation
+ pmat = &ap;
+ // Note that ordering methods compute the inverse permutation
+ if(!internal::is_same<OrderingType,NaturalOrdering<Index> >::value)
{
- CholMatrixType C;
- C = a.template selfadjointView<UpLo>();
+ {
+ CholMatrixType C;
+ C = a.template selfadjointView<UpLo>();
+
+ OrderingType ordering;
+ ordering(C,m_Pinv);
+ }
+
+ if(m_Pinv.size()>0) m_P = m_Pinv.inverse();
+ else m_P.resize(0);
- OrderingType ordering;
- ordering(C,m_Pinv);
+ ap.resize(size,size);
+ ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
}
-
- if(m_Pinv.size()>0)
- m_P = m_Pinv.inverse();
else
+ {
+ m_Pinv.resize(0);
m_P.resize(0);
-
- ap.resize(size,size);
- ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
+ if(UpLo==Lower || MatrixType::IsRowMajor)
+ {
+ // we have to transpose the lower part to to the upper one
+ ap.resize(size,size);
+ ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>();
+ }
+ else
+ internal::simplicial_cholesky_grab_input<CholMatrixType,MatrixType>::run(a, pmat, ap);
+ }
}
-namespace internal {
-
-template<typename Derived, typename Rhs>
-struct solve_retval<SimplicialCholeskyBase<Derived>, Rhs>
- : solve_retval_base<SimplicialCholeskyBase<Derived>, Rhs>
-{
- typedef SimplicialCholeskyBase<Derived> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec().derived()._solve(rhs(),dst);
- }
-};
-
-template<typename Derived, typename Rhs>
-struct sparse_solve_retval<SimplicialCholeskyBase<Derived>, Rhs>
- : sparse_solve_retval_base<SimplicialCholeskyBase<Derived>, Rhs>
-{
- typedef SimplicialCholeskyBase<Derived> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- this->defaultEvalTo(dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_SIMPLICIAL_CHOLESKY_H
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
index 7aaf702be..b7fd62faa 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@@ -126,7 +126,7 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
Index top = size; // stack for pattern is empty
tags[k] = k; // mark node k as visited
m_nonZerosPerCol[k] = 0; // count of nonzeros in column k of L
- for(typename MatrixType::InnerIterator it(ap,k); it; ++it)
+ for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)
{
Index i = it.index();
if(i <= k)
diff --git a/Eigen/src/SparseCore/AmbiVector.h b/Eigen/src/SparseCore/AmbiVector.h
index 17fff96a7..76ef25f7d 100644
--- a/Eigen/src/SparseCore/AmbiVector.h
+++ b/Eigen/src/SparseCore/AmbiVector.h
@@ -27,7 +27,7 @@ class AmbiVector
typedef _Index Index;
typedef typename NumTraits<Scalar>::Real RealScalar;
- AmbiVector(Index size)
+ explicit AmbiVector(Index size)
: m_buffer(0), m_zero(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)
{
resize(size);
@@ -69,7 +69,7 @@ class AmbiVector
delete[] m_buffer;
if (size<1000)
{
- Index allocSize = (size * sizeof(ListEl))/sizeof(Scalar);
+ Index allocSize = (size * sizeof(ListEl) + sizeof(Scalar) - 1)/sizeof(Scalar);
m_allocatedElements = (allocSize*sizeof(Scalar))/sizeof(ListEl);
m_buffer = new Scalar[allocSize];
}
@@ -88,7 +88,7 @@ class AmbiVector
Index copyElements = m_allocatedElements;
m_allocatedElements = (std::min)(Index(m_allocatedElements*1.5),m_size);
Index allocSize = m_allocatedElements * sizeof(ListEl);
- allocSize = allocSize/sizeof(Scalar) + (allocSize%sizeof(Scalar)>0?1:0);
+ allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);
Scalar* newBuffer = new Scalar[allocSize];
memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl));
delete[] m_buffer;
@@ -288,7 +288,7 @@ class AmbiVector<_Scalar,_Index>::Iterator
* In practice, all coefficients having a magnitude smaller than \a epsilon
* are skipped.
*/
- Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0)
+ explicit Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0)
: m_vector(vec)
{
using std::abs;
diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h
index a667cb56e..99f741138 100644
--- a/Eigen/src/SparseCore/CompressedStorage.h
+++ b/Eigen/src/SparseCore/CompressedStorage.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -36,7 +36,7 @@ class CompressedStorage
: m_values(0), m_indices(0), m_size(0), m_allocatedSize(0)
{}
- CompressedStorage(size_t size)
+ explicit CompressedStorage(size_t size)
: m_values(0), m_indices(0), m_size(0), m_allocatedSize(0)
{
resize(size);
@@ -108,15 +108,6 @@ class CompressedStorage
inline Index& index(size_t i) { return m_indices[i]; }
inline const Index& index(size_t i) const { return m_indices[i]; }
- static CompressedStorage Map(Index* indices, Scalar* values, size_t size)
- {
- CompressedStorage res;
- res.m_indices = indices;
- res.m_values = values;
- res.m_allocatedSize = res.m_size = size;
- return res;
- }
-
/** \returns the largest \c k such that for all \c j in [0,k) index[\c j]\<\a key */
inline Index searchLowerIndex(Index key) const
{
@@ -152,10 +143,10 @@ class CompressedStorage
}
/** Like at(), but the search is performed in the range [start,end) */
- inline Scalar atInRange(size_t start, size_t end, Index key, const Scalar& defaultValue = Scalar(0)) const
+ inline Scalar atInRange(size_t start, size_t end, Index key, const Scalar &defaultValue = Scalar(0)) const
{
if (start>=end)
- return Scalar(0);
+ return defaultValue;
else if (end>start && key==m_indices[end-1])
return m_values[end-1];
// ^^ optimization: let's first check if it is the last coefficient
@@ -172,12 +163,31 @@ class CompressedStorage
size_t id = searchLowerIndex(0,m_size,key);
if (id>=m_size || m_indices[id]!=key)
{
- resize(m_size+1,1);
- for (size_t j=m_size-1; j>id; --j)
+ if (m_allocatedSize<m_size+1)
{
- m_indices[j] = m_indices[j-1];
- m_values[j] = m_values[j-1];
+ m_allocatedSize = 2*(m_size+1);
+ internal::scoped_array<Scalar> newValues(m_allocatedSize);
+ internal::scoped_array<Index> newIndices(m_allocatedSize);
+
+ // copy first chunk
+ internal::smart_copy(m_values, m_values +id, newValues.ptr());
+ internal::smart_copy(m_indices, m_indices+id, newIndices.ptr());
+
+ // copy the rest
+ if(m_size>id)
+ {
+ internal::smart_copy(m_values +id, m_values +m_size, newValues.ptr() +id+1);
+ internal::smart_copy(m_indices+id, m_indices+m_size, newIndices.ptr()+id+1);
+ }
+ std::swap(m_values,newValues.ptr());
+ std::swap(m_indices,newIndices.ptr());
}
+ else if(m_size>id)
+ {
+ internal::smart_memmove(m_values +id, m_values +m_size, m_values +id+1);
+ internal::smart_memmove(m_indices+id, m_indices+m_size, m_indices+id+1);
+ }
+ m_size++;
m_indices[id] = key;
m_values[id] = defaultValue;
}
@@ -204,17 +214,14 @@ class CompressedStorage
inline void reallocate(size_t size)
{
- Scalar* newValues = new Scalar[size];
- Index* newIndices = new Index[size];
+ eigen_internal_assert(size!=m_allocatedSize);
+ internal::scoped_array<Scalar> newValues(size);
+ internal::scoped_array<Index> newIndices(size);
size_t copySize = (std::min)(size, m_size);
- // copy
- internal::smart_copy(m_values, m_values+copySize, newValues);
- internal::smart_copy(m_indices, m_indices+copySize, newIndices);
- // delete old stuff
- delete[] m_values;
- delete[] m_indices;
- m_values = newValues;
- m_indices = newIndices;
+ internal::smart_copy(m_values, m_values+copySize, newValues.ptr());
+ internal::smart_copy(m_indices, m_indices+copySize, newIndices.ptr());
+ std::swap(m_values,newValues.ptr());
+ std::swap(m_indices,newIndices.ptr());
m_allocatedSize = size;
}
diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 67bc33a93..a30522ff7 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -38,6 +38,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
// per column of the lhs.
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
+
+ typename evaluator<Lhs>::type lhsEval(lhs);
+ typename evaluator<Rhs>::type rhsEval(rhs);
res.setZero();
res.reserve(Index(estimated_nnz_prod));
@@ -47,11 +50,11 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
res.startVec(j);
Index nnz = 0;
- for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+ for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
Scalar y = rhsIt.value();
Index k = rhsIt.index();
- for (typename Lhs::InnerIterator lhsIt(lhs, k); lhsIt; ++lhsIt)
+ for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{
Index i = lhsIt.index();
Scalar x = lhsIt.value();
@@ -88,7 +91,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
// otherwise => loop through the entire vector
// In order to avoid to perform an expensive log2 when the
// result is clearly very sparse we use a linear bound up to 200.
- if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t)
+ if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
{
if(nnz>1) std::sort(indices,indices+nnz);
for(Index k=0; k<nnz; ++k)
@@ -138,6 +141,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrixAux;
typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime>::type ColMajorMatrix;
+ // If the result is tall and thin (in the extreme case a column vector)
+ // then it is faster to sort the coefficients inplace instead of transposing twice.
// FIXME, the following heuristic is probably not very good.
if(lhs.rows()>=rhs.cols())
{
diff --git a/Eigen/src/SparseCore/MappedSparseMatrix.h b/Eigen/src/SparseCore/MappedSparseMatrix.h
index ab1a266a9..2852c669a 100644
--- a/Eigen/src/SparseCore/MappedSparseMatrix.h
+++ b/Eigen/src/SparseCore/MappedSparseMatrix.h
@@ -176,6 +176,32 @@ class MappedSparseMatrix<Scalar,_Flags,_Index>::ReverseInnerIterator
const Index m_end;
};
+namespace internal {
+
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<MappedSparseMatrix<_Scalar,_Options,_Index> >
+ : evaluator_base<MappedSparseMatrix<_Scalar,_Options,_Index> >
+{
+ typedef MappedSparseMatrix<_Scalar,_Options,_Index> MappedSparseMatrixType;
+ typedef typename MappedSparseMatrixType::InnerIterator InnerIterator;
+ typedef typename MappedSparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
+
+ enum {
+ CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+ Flags = MappedSparseMatrixType::Flags
+ };
+
+ evaluator() : m_matrix(0) {}
+ explicit evaluator(const MappedSparseMatrixType &mat) : m_matrix(&mat) {}
+
+ operator MappedSparseMatrixType&() { return m_matrix->const_cast_derived(); }
+ operator const MappedSparseMatrixType&() const { return *m_matrix; }
+
+ const MappedSparseMatrixType *m_matrix;
+};
+
+}
+
} // end namespace Eigen
#endif // EIGEN_MAPPED_SPARSEMATRIX_H
diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h
new file mode 100644
index 000000000..97c079d3f
--- /dev/null
+++ b/Eigen/src/SparseCore/SparseAssign.h
@@ -0,0 +1,192 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSEASSIGN_H
+#define EIGEN_SPARSEASSIGN_H
+
+namespace Eigen {
+
+template<typename Derived>
+template<typename OtherDerived>
+Derived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
+{
+ // TODO use the evaluator mechanism
+ other.derived().evalTo(derived());
+ return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+Derived& SparseMatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+ // TODO use the evaluator mechanism
+ other.evalTo(derived());
+ return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived& SparseMatrixBase<Derived>::operator=(const SparseMatrixBase<OtherDerived>& other)
+{
+ // FIXME, by default sparse evaluation do not alias, so we should be able to bypass the generic call_assignment
+ internal::call_assignment/*_no_alias*/(derived(), other.derived());
+ return derived();
+}
+
+template<typename Derived>
+inline Derived& SparseMatrixBase<Derived>::operator=(const Derived& other)
+{
+ internal::call_assignment_no_alias(derived(), other.derived());
+ return derived();
+}
+
+namespace internal {
+
+template<>
+struct storage_kind_to_evaluator_kind<Sparse> {
+ typedef IteratorBased Kind;
+};
+
+template<>
+struct storage_kind_to_shape<Sparse> {
+ typedef SparseShape Shape;
+};
+
+struct Sparse2Sparse {};
+struct Sparse2Dense {};
+
+template<> struct AssignmentKind<SparseShape, SparseShape> { typedef Sparse2Sparse Kind; };
+template<> struct AssignmentKind<SparseShape, SparseTriangularShape> { typedef Sparse2Sparse Kind; };
+template<> struct AssignmentKind<DenseShape, SparseShape> { typedef Sparse2Dense Kind; };
+
+
+template<typename DstXprType, typename SrcXprType>
+void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)
+{
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+ typedef typename DstXprType::Index Index;
+ typedef typename DstXprType::Scalar Scalar;
+ typedef typename internal::evaluator<DstXprType>::type DstEvaluatorType;
+ typedef typename internal::evaluator<SrcXprType>::type SrcEvaluatorType;
+
+ SrcEvaluatorType srcEvaluator(src);
+
+ const bool transpose = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit);
+ const Index outerEvaluationSize = (SrcEvaluatorType::Flags&RowMajorBit) ? src.rows() : src.cols();
+ if ((!transpose) && src.isRValue())
+ {
+ // eval without temporary
+ dst.resize(src.rows(), src.cols());
+ dst.setZero();
+ dst.reserve((std::max)(src.rows(),src.cols())*2);
+ for (Index j=0; j<outerEvaluationSize; ++j)
+ {
+ dst.startVec(j);
+ for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it)
+ {
+ Scalar v = it.value();
+ dst.insertBackByOuterInner(j,it.index()) = v;
+ }
+ }
+ dst.finalize();
+ }
+ else
+ {
+ // eval through a temporary
+ eigen_assert(( ((internal::traits<DstXprType>::SupportedAccessPatterns & OuterRandomAccessPattern)==OuterRandomAccessPattern) ||
+ (!((DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit)))) &&
+ "the transpose operation is supposed to be handled in SparseMatrix::operator=");
+
+ enum { Flip = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit) };
+
+
+ DstXprType temp(src.rows(), src.cols());
+
+ temp.reserve((std::max)(src.rows(),src.cols())*2);
+ for (Index j=0; j<outerEvaluationSize; ++j)
+ {
+ temp.startVec(j);
+ for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it)
+ {
+ Scalar v = it.value();
+ temp.insertBackByOuterInner(Flip?it.index():j,Flip?j:it.index()) = v;
+ }
+ }
+ temp.finalize();
+
+ dst = temp.markAsRValue();
+ }
+}
+
+// Generic Sparse to Sparse assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Sparse, Scalar>
+{
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+
+ assign_sparse_to_sparse(dst.derived(), src.derived());
+ }
+};
+
+// Sparse to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense, Scalar>
+{
+ static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ typedef typename SrcXprType::Index Index;
+
+ typename internal::evaluator<SrcXprType>::type srcEval(src);
+ typename internal::evaluator<DstXprType>::type dstEval(dst);
+ const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();
+ for (Index j=0; j<outerEvaluationSize; ++j)
+ for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)
+ func.assignCoeff(dstEval.coeffRef(i.row(),i.col()), i.value());
+ }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, internal::assign_op<typename DstXprType::Scalar>, Sparse2Dense, Scalar>
+{
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ typedef typename SrcXprType::Index Index;
+
+ dst.setZero();
+ typename internal::evaluator<SrcXprType>::type srcEval(src);
+ typename internal::evaluator<DstXprType>::type dstEval(dst);
+ const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();
+ for (Index j=0; j<outerEvaluationSize; ++j)
+ for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)
+ dstEval.coeffRef(i.row(),i.col()) = i.value();
+ }
+};
+
+// Specialization for "dst = dec.solve(rhs)"
+// NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Sparse2Sparse, Scalar>
+{
+ typedef Solve<DecType,RhsType> SrcXprType;
+ static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+ {
+ src.dec()._solve_impl(src.rhs(), dst);
+ }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPARSEASSIGN_H
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index 491cc72b0..9e4da2057 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,6 +12,7 @@
namespace Eigen {
+// Subset of columns or rows
template<typename XprType, int BlockRows, int BlockCols>
class BlockImpl<XprType,BlockRows,BlockCols,true,Sparse>
: public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,true> >
@@ -24,31 +25,6 @@ protected:
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
public:
EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
-
- class InnerIterator: public XprType::InnerIterator
- {
- typedef typename BlockImpl::Index Index;
- public:
- inline InnerIterator(const BlockType& xpr, Index outer)
- : XprType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
- class ReverseInnerIterator: public XprType::ReverseInnerIterator
- {
- typedef typename BlockImpl::Index Index;
- public:
- inline ReverseInnerIterator(const BlockType& xpr, Index outer)
- : XprType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
inline BlockImpl(const XprType& xpr, Index i)
: m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
@@ -63,13 +39,21 @@ public:
Index nonZeros() const
{
+ typedef typename internal::evaluator<XprType>::type EvaluatorType;
+ EvaluatorType matEval(m_matrix);
Index nnz = 0;
Index end = m_outerStart + m_outerSize.value();
- for(Index j=m_outerStart; j<end; ++j)
- for(typename XprType::InnerIterator it(m_matrix, j); it; ++it)
+ for(int j=m_outerStart; j<end; ++j)
+ for(typename EvaluatorType::InnerIterator it(matEval, j); it; ++it)
++nnz;
return nnz;
}
+
+ inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
+ Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
+ Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
+ Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
protected:
@@ -100,29 +84,6 @@ public:
protected:
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
public:
-
- class InnerIterator: public SparseMatrixType::InnerIterator
- {
- public:
- inline InnerIterator(const BlockType& xpr, Index outer)
- : SparseMatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
- class ReverseInnerIterator: public SparseMatrixType::ReverseInnerIterator
- {
- public:
- inline ReverseInnerIterator(const BlockType& xpr, Index outer)
- : SparseMatrixType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i)
: m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
@@ -248,6 +209,12 @@ public:
EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
+
+ inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
+ Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
+ Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
+ Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
protected:
@@ -407,32 +374,11 @@ public:
}
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ Index startRow() const { return m_startRow.value(); }
+ Index startCol() const { return m_startCol.value(); }
+ Index blockRows() const { return m_blockRows.value(); }
+ Index blockCols() const { return m_blockCols.value(); }
- typedef internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel> InnerIterator;
-
- class ReverseInnerIterator : public _MatrixTypeNested::ReverseInnerIterator
- {
- typedef typename _MatrixTypeNested::ReverseInnerIterator Base;
- const BlockType& m_block;
- Index m_begin;
- public:
-
- EIGEN_STRONG_INLINE ReverseInnerIterator(const BlockType& block, Index outer)
- : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
- m_block(block),
- m_begin(IsRowMajor ? block.m_startCol.value() : block.m_startRow.value())
- {
- while( (Base::operator bool()) && (Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value())) )
- Base::operator--();
- }
-
- inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
- inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
- inline Index row() const { return Base::row() - m_block.m_startRow.value(); }
- inline Index col() const { return Base::col() - m_block.m_startCol.value(); }
-
- inline operator bool() const { return Base::operator bool() && Base::index() >= m_begin; }
- };
protected:
friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
friend class ReverseInnerIterator;
@@ -497,7 +443,7 @@ namespace internal {
Index m_end;
public:
- EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
+ explicit EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
:
m_block(block),
m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
@@ -520,10 +466,8 @@ namespace internal {
inline GenericSparseBlockInnerIteratorImpl& operator++()
{
// search next non-zero entry
- while(m_outerPos<m_end)
+ while(++m_outerPos<m_end)
{
- m_outerPos++;
- if(m_outerPos==m_end) break;
typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
// search for the key m_innerIndex in the current outer-vector
while(it && it.index() < m_innerIndex) ++it;
@@ -538,7 +482,119 @@ namespace internal {
inline operator bool() const { return m_outerPos < m_end; }
};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
+ : public evaluator_base<Block<ArgType,BlockRows,BlockCols,InnerPanel> >
+{
+ class InnerVectorInnerIterator;
+ class OuterVectorInnerIterator;
+ public:
+ typedef Block<ArgType,BlockRows,BlockCols,InnerPanel> XprType;
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+
+ class ReverseInnerIterator;
+
+ enum {
+ IsRowMajor = XprType::IsRowMajor,
+
+ OuterVector = (BlockCols==1 && ArgType::IsRowMajor)
+ | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+ // revert to || as soon as not needed anymore.
+ (BlockRows==1 && !ArgType::IsRowMajor),
+
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = XprType::Flags
+ };
+
+ typedef typename internal::conditional<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator>::type InnerIterator;
+
+ explicit unary_evaluator(const XprType& op)
+ : m_argImpl(op.nestedExpression()), m_block(op)
+ {}
+
+ protected:
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+
+ typename evaluator<ArgType>::nestedType m_argImpl;
+ const XprType &m_block;
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::InnerVectorInnerIterator
+ : public EvalIterator
+{
+ const XprType& m_block;
+ Index m_end;
+public:
+
+ EIGEN_STRONG_INLINE InnerVectorInnerIterator(const unary_evaluator& aEval, Index outer)
+ : EvalIterator(aEval.m_argImpl, outer + (IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol())),
+ m_block(aEval.m_block),
+ m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
+ {
+ while( (EvalIterator::operator bool()) && (EvalIterator::index() < (IsRowMajor ? m_block.startCol() : m_block.startRow())) )
+ EvalIterator::operator++();
+ }
+
+ inline Index index() const { return EvalIterator::index() - (IsRowMajor ? m_block.startCol() : m_block.startRow()); }
+ inline Index outer() const { return EvalIterator::outer() - (IsRowMajor ? m_block.startRow() : m_block.startCol()); }
+ inline Index row() const { return EvalIterator::row() - m_block.startRow(); }
+ inline Index col() const { return EvalIterator::col() - m_block.startCol(); }
+
+ inline operator bool() const { return EvalIterator::operator bool() && EvalIterator::index() < m_end; }
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::OuterVectorInnerIterator
+{
+ const unary_evaluator& m_eval;
+ Index m_outerPos;
+ Index m_innerIndex;
+ Scalar m_value;
+ Index m_end;
+public:
+
+ EIGEN_STRONG_INLINE OuterVectorInnerIterator(const unary_evaluator& aEval, Index outer)
+ : m_eval(aEval),
+ m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
+ m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
+ m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
+ {
+ EIGEN_UNUSED_VARIABLE(outer);
+ eigen_assert(outer==0);
+
+ ++(*this);
+ }
+
+ inline Index index() const { return m_outerPos - (IsRowMajor ? m_eval.m_block.startCol() : m_eval.m_block.startRow()); }
+ inline Index outer() const { return 0; }
+ inline Index row() const { return IsRowMajor ? 0 : index(); }
+ inline Index col() const { return IsRowMajor ? index() : 0; }
+ inline Scalar value() const { return m_value; }
+
+ inline OuterVectorInnerIterator& operator++()
+ {
+ // search next non-zero entry
+ while(++m_outerPos<m_end)
+ {
+ EvalIterator it(m_eval.m_argImpl, m_outerPos);
+ // search for the key m_innerIndex in the current outer-vector
+ while(it && it.index() < m_innerIndex) ++it;
+ if(it && it.index()==m_innerIndex)
+ {
+ m_value = it.value();
+ break;
+ }
+ }
+ return *this;
+ }
+
+ inline operator bool() const { return m_outerPos < m_end; }
+};
+
} // end namespace internal
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index 60fdd214a..94ca9b1a4 100644
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -31,12 +31,6 @@ namespace Eigen {
namespace internal {
-template<> struct promote_storage_type<Dense,Sparse>
-{ typedef Sparse ret; };
-
-template<> struct promote_storage_type<Sparse,Dense>
-{ typedef Sparse ret; };
-
template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived,
typename _LhsStorageMode = typename traits<Lhs>::StorageKind,
typename _RhsStorageMode = typename traits<Rhs>::StorageKind>
@@ -44,71 +38,35 @@ class sparse_cwise_binary_op_inner_iterator_selector;
} // end namespace internal
-template<typename BinaryOp, typename Lhs, typename Rhs>
-class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
- : public SparseMatrixBase<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
-{
- public:
- class InnerIterator;
- class ReverseInnerIterator;
- typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
- EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)
- CwiseBinaryOpImpl()
- {
- typedef typename internal::traits<Lhs>::StorageKind LhsStorageKind;
- typedef typename internal::traits<Rhs>::StorageKind RhsStorageKind;
- EIGEN_STATIC_ASSERT((
- (!internal::is_same<LhsStorageKind,RhsStorageKind>::value)
- || ((Lhs::Flags&RowMajorBit) == (Rhs::Flags&RowMajorBit))),
- THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH);
- }
-};
-
-template<typename BinaryOp, typename Lhs, typename Rhs>
-class CwiseBinaryOpImpl<BinaryOp,Lhs,Rhs,Sparse>::InnerIterator
- : public internal::sparse_cwise_binary_op_inner_iterator_selector<BinaryOp,Lhs,Rhs,typename CwiseBinaryOpImpl<BinaryOp,Lhs,Rhs,Sparse>::InnerIterator>
-{
- public:
- typedef internal::sparse_cwise_binary_op_inner_iterator_selector<
- BinaryOp,Lhs,Rhs, InnerIterator> Base;
-
- EIGEN_STRONG_INLINE InnerIterator(const CwiseBinaryOpImpl& binOp, Index outer)
- : Base(binOp.derived(),outer)
- {}
-};
-
-/***************************************************************************
-* Implementation of inner-iterators
-***************************************************************************/
-
-// template<typename T> struct internal::func_is_conjunction { enum { ret = false }; };
-// template<typename T> struct internal::func_is_conjunction<internal::scalar_product_op<T> > { enum { ret = true }; };
-
-// TODO generalize the internal::scalar_product_op specialization to all conjunctions if any !
-
namespace internal {
-// sparse - sparse (generic)
-template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived, Sparse, Sparse>
+
+// Generic "sparse OP sparse"
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased>
+ : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{
- typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> CwiseBinaryXpr;
- typedef typename traits<CwiseBinaryXpr>::Scalar Scalar;
- typedef typename traits<CwiseBinaryXpr>::Index Index;
- typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested;
- typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested;
- typedef typename _LhsNested::InnerIterator LhsIterator;
- typedef typename _RhsNested::InnerIterator RhsIterator;
+protected:
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
+ typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
+public:
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+ class ReverseInnerIterator;
+ class InnerIterator
+ {
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename XprType::Index Index;
public:
-
- EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
- : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor())
+
+ EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+ : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)
{
this->operator++();
}
- EIGEN_STRONG_INLINE Derived& operator++()
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
{
if (m_lhsIter && m_rhsIter && (m_lhsIter.index() == m_rhsIter.index()))
{
@@ -134,7 +92,7 @@ class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived
m_value = 0; // this is to avoid a compilation warning
m_id = -1;
}
- return *static_cast<Derived*>(this);
+ return *this;
}
EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
@@ -151,24 +109,48 @@ class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived
const BinaryOp& m_functor;
Scalar m_value;
Index m_id;
+ };
+
+
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ Flags = XprType::Flags
+ };
+
+ explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs())
+ { }
+
+protected:
+ const BinaryOp m_functor;
+ typename evaluator<Lhs>::nestedType m_lhsImpl;
+ typename evaluator<Rhs>::nestedType m_rhsImpl;
};
-// sparse - sparse (product)
-template<typename T, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Sparse, Sparse>
+// "sparse .* sparse"
+template<typename T, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased>
+ : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
{
- typedef scalar_product_op<T> BinaryFunc;
- typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr;
- typedef typename CwiseBinaryXpr::Scalar Scalar;
- typedef typename CwiseBinaryXpr::Index Index;
- typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested;
- typedef typename _LhsNested::InnerIterator LhsIterator;
- typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested;
- typedef typename _RhsNested::InnerIterator RhsIterator;
- public:
+protected:
+ typedef scalar_product_op<T> BinaryOp;
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
+ typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
+public:
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+ class ReverseInnerIterator;
+ class InnerIterator
+ {
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename XprType::Index Index;
- EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
- : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor())
+ public:
+
+ EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+ : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)
{
while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index()))
{
@@ -179,7 +161,7 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs,
}
}
- EIGEN_STRONG_INLINE Derived& operator++()
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
{
++m_lhsIter;
++m_rhsIter;
@@ -190,9 +172,9 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs,
else
++m_rhsIter;
}
- return *static_cast<Derived*>(this);
+ return *this;
}
-
+
EIGEN_STRONG_INLINE Scalar value() const { return m_functor(m_lhsIter.value(), m_rhsIter.value()); }
EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); }
@@ -204,91 +186,159 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs,
protected:
LhsIterator m_lhsIter;
RhsIterator m_rhsIter;
- const BinaryFunc& m_functor;
+ const BinaryOp& m_functor;
+ };
+
+
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ Flags = XprType::Flags
+ };
+
+ explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs())
+ { }
+
+protected:
+ const BinaryOp m_functor;
+ typename evaluator<Lhs>::nestedType m_lhsImpl;
+ typename evaluator<Rhs>::nestedType m_rhsImpl;
};
-// sparse - dense (product)
-template<typename T, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Sparse, Dense>
+// "dense .* sparse"
+template<typename T, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IndexBased, IteratorBased>
+ : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
{
- typedef scalar_product_op<T> BinaryFunc;
- typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr;
- typedef typename CwiseBinaryXpr::Scalar Scalar;
- typedef typename CwiseBinaryXpr::Index Index;
- typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested;
- typedef typename traits<CwiseBinaryXpr>::RhsNested RhsNested;
- typedef typename _LhsNested::InnerIterator LhsIterator;
- enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
- public:
+protected:
+ typedef scalar_product_op<T> BinaryOp;
+ typedef typename evaluator<Lhs>::type LhsEvaluator;
+ typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
+public:
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+ class ReverseInnerIterator;
+ class InnerIterator
+ {
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename XprType::Index Index;
+ enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
- EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
- : m_rhs(xpr.rhs()), m_lhsIter(xpr.lhs(),typename _LhsNested::Index(outer)), m_functor(xpr.functor()), m_outer(outer)
+ public:
+
+ EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+ : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_outer(outer)
{}
- EIGEN_STRONG_INLINE Derived& operator++()
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
{
- ++m_lhsIter;
- return *static_cast<Derived*>(this);
+ ++m_rhsIter;
+ return *this;
}
EIGEN_STRONG_INLINE Scalar value() const
- { return m_functor(m_lhsIter.value(),
- m_rhs.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); }
+ { return m_functor(m_lhsEval.coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); }
- EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); }
- EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); }
- EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }
+ EIGEN_STRONG_INLINE Index index() const { return m_rhsIter.index(); }
+ EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); }
+ EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }
- EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
+ EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }
protected:
- RhsNested m_rhs;
- LhsIterator m_lhsIter;
- const BinaryFunc m_functor;
+ const LhsEvaluator &m_lhsEval;
+ RhsIterator m_rhsIter;
+ const BinaryOp& m_functor;
const Index m_outer;
+ };
+
+
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ Flags = XprType::Flags
+ };
+
+ explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs())
+ { }
+
+protected:
+ const BinaryOp m_functor;
+ typename evaluator<Lhs>::nestedType m_lhsImpl;
+ typename evaluator<Rhs>::nestedType m_rhsImpl;
};
-// sparse - dense (product)
-template<typename T, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Dense, Sparse>
+// "sparse .* dense"
+template<typename T, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IndexBased>
+ : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
{
- typedef scalar_product_op<T> BinaryFunc;
- typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr;
- typedef typename CwiseBinaryXpr::Scalar Scalar;
- typedef typename CwiseBinaryXpr::Index Index;
- typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested;
- typedef typename _RhsNested::InnerIterator RhsIterator;
+protected:
+ typedef scalar_product_op<T> BinaryOp;
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
+ typedef typename evaluator<Rhs>::type RhsEvaluator;
+public:
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+ class ReverseInnerIterator;
+ class InnerIterator
+ {
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename XprType::Index Index;
+ enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
- enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
public:
-
- EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
- : m_xpr(xpr), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()), m_outer(outer)
+
+ EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+ : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_outer(outer)
{}
- EIGEN_STRONG_INLINE Derived& operator++()
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
{
- ++m_rhsIter;
- return *static_cast<Derived*>(this);
+ ++m_lhsIter;
+ return *this;
}
EIGEN_STRONG_INLINE Scalar value() const
- { return m_functor(m_xpr.lhs().coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); }
+ { return m_functor(m_lhsIter.value(),
+ m_rhsEval.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); }
- EIGEN_STRONG_INLINE Index index() const { return m_rhsIter.index(); }
- EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); }
- EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }
+ EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); }
+ EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); }
+ EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }
- EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }
+ EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
protected:
- const CwiseBinaryXpr& m_xpr;
- RhsIterator m_rhsIter;
- const BinaryFunc& m_functor;
+ LhsIterator m_lhsIter;
+ const RhsEvaluator &m_rhsEval;
+ const BinaryOp& m_functor;
const Index m_outer;
+ };
+
+
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ Flags = XprType::Flags
+ };
+
+ explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs())
+ { }
+
+protected:
+ const BinaryOp m_functor;
+ typename evaluator<Lhs>::nestedType m_lhsImpl;
+ typename evaluator<Rhs>::nestedType m_rhsImpl;
};
-} // end namespace internal
+}
/***************************************************************************
* Implementation of SparseMatrixBase and SparseCwise functions/operators
diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
index 5a50c7803..32b7bc949 100644
--- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,131 +12,154 @@
namespace Eigen {
-template<typename UnaryOp, typename MatrixType>
-class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>
- : public SparseMatrixBase<CwiseUnaryOp<UnaryOp, MatrixType> >
+namespace internal {
+
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
+ : public evaluator_base<CwiseUnaryOp<UnaryOp,ArgType> >
{
public:
+ typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
class InnerIterator;
- class ReverseInnerIterator;
-
- typedef CwiseUnaryOp<UnaryOp, MatrixType> Derived;
- EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)
+// class ReverseInnerIterator;
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ Flags = XprType::Flags
+ };
+
+ explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {}
protected:
- typedef typename internal::traits<Derived>::_XprTypeNested _MatrixTypeNested;
- typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator;
- typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator;
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
+
+ const UnaryOp m_functor;
+ typename evaluator<ArgType>::nestedType m_argImpl;
};
-template<typename UnaryOp, typename MatrixType>
-class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::InnerIterator
- : public CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeIterator
+template<typename UnaryOp, typename ArgType>
+class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterator
+ : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator
{
- typedef typename CwiseUnaryOpImpl::Scalar Scalar;
- typedef typename CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeIterator Base;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator Base;
public:
- EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer)
- : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+ EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+ : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor)
{}
EIGEN_STRONG_INLINE InnerIterator& operator++()
{ Base::operator++(); return *this; }
- EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); }
+ EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
protected:
const UnaryOp m_functor;
private:
- typename CwiseUnaryOpImpl::Scalar& valueRef();
+ Scalar& valueRef();
};
-template<typename UnaryOp, typename MatrixType>
-class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::ReverseInnerIterator
- : public CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeReverseIterator
-{
- typedef typename CwiseUnaryOpImpl::Scalar Scalar;
- typedef typename CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeReverseIterator Base;
- public:
-
- EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer)
- : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
- {}
-
- EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
- { Base::operator--(); return *this; }
-
- EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); }
-
- protected:
- const UnaryOp m_functor;
- private:
- typename CwiseUnaryOpImpl::Scalar& valueRef();
-};
-
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>
- : public SparseMatrixBase<CwiseUnaryView<ViewOp, MatrixType> >
+// template<typename UnaryOp, typename ArgType>
+// class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::ReverseInnerIterator
+// : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator
+// {
+// typedef typename XprType::Scalar Scalar;
+// typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
+// public:
+//
+// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
+// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+// {}
+//
+// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
+// { Base::operator--(); return *this; }
+//
+// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
+//
+// protected:
+// const UnaryOp m_functor;
+// private:
+// Scalar& valueRef();
+// };
+
+
+
+
+
+template<typename ViewOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
+ : public evaluator_base<CwiseUnaryView<ViewOp,ArgType> >
{
public:
+ typedef CwiseUnaryView<ViewOp, ArgType> XprType;
class InnerIterator;
class ReverseInnerIterator;
-
- typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
- EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+ Flags = XprType::Flags
+ };
+
+ explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {}
protected:
- typedef typename internal::traits<Derived>::_MatrixTypeNested _MatrixTypeNested;
- typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator;
- typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator;
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
+
+ const ViewOp m_functor;
+ typename evaluator<ArgType>::nestedType m_argImpl;
};
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::InnerIterator
- : public CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeIterator
+template<typename ViewOp, typename ArgType>
+class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerIterator
+ : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator
{
- typedef typename CwiseUnaryViewImpl::Scalar Scalar;
- typedef typename CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeIterator Base;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator Base;
public:
- EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer)
- : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+ EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+ : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor)
{}
EIGEN_STRONG_INLINE InnerIterator& operator++()
{ Base::operator++(); return *this; }
- EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); }
- EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); }
+ EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
+ EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); }
protected:
const ViewOp m_functor;
};
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::ReverseInnerIterator
- : public CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeReverseIterator
-{
- typedef typename CwiseUnaryViewImpl::Scalar Scalar;
- typedef typename CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeReverseIterator Base;
- public:
-
- EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer)
- : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
- {}
-
- EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
- { Base::operator--(); return *this; }
-
- EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); }
- EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); }
-
- protected:
- const ViewOp m_functor;
-};
+// template<typename ViewOp, typename ArgType>
+// class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::ReverseInnerIterator
+// : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator
+// {
+// typedef typename XprType::Scalar Scalar;
+// typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
+// public:
+//
+// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
+// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+// {}
+//
+// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
+// { Base::operator--(); return *this; }
+//
+// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
+// EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); }
+//
+// protected:
+// const ViewOp m_functor;
+// };
+
+
+} // end namespace internal
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h
index d40e966c1..5aea11425 100644
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,152 +12,10 @@
namespace Eigen {
-template<typename Lhs, typename Rhs, int InnerSize> struct SparseDenseProductReturnType
-{
- typedef SparseTimeDenseProduct<Lhs,Rhs> Type;
-};
-
-template<typename Lhs, typename Rhs> struct SparseDenseProductReturnType<Lhs,Rhs,1>
-{
- typedef typename internal::conditional<
- Lhs::IsRowMajor,
- SparseDenseOuterProduct<Rhs,Lhs,true>,
- SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type;
-};
-
-template<typename Lhs, typename Rhs, int InnerSize> struct DenseSparseProductReturnType
-{
- typedef DenseTimeSparseProduct<Lhs,Rhs> Type;
-};
-
-template<typename Lhs, typename Rhs> struct DenseSparseProductReturnType<Lhs,Rhs,1>
-{
- typedef typename internal::conditional<
- Rhs::IsRowMajor,
- SparseDenseOuterProduct<Rhs,Lhs,true>,
- SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type;
-};
-
namespace internal {
-template<typename Lhs, typename Rhs, bool Tr>
-struct traits<SparseDenseOuterProduct<Lhs,Rhs,Tr> >
-{
- typedef Sparse StorageKind;
- typedef typename scalar_product_traits<typename traits<Lhs>::Scalar,
- typename traits<Rhs>::Scalar>::ReturnType Scalar;
- typedef typename Lhs::Index Index;
- typedef typename Lhs::Nested LhsNested;
- typedef typename Rhs::Nested RhsNested;
- typedef typename remove_all<LhsNested>::type _LhsNested;
- typedef typename remove_all<RhsNested>::type _RhsNested;
-
- enum {
- LhsCoeffReadCost = traits<_LhsNested>::CoeffReadCost,
- RhsCoeffReadCost = traits<_RhsNested>::CoeffReadCost,
-
- RowsAtCompileTime = Tr ? int(traits<Rhs>::RowsAtCompileTime) : int(traits<Lhs>::RowsAtCompileTime),
- ColsAtCompileTime = Tr ? int(traits<Lhs>::ColsAtCompileTime) : int(traits<Rhs>::ColsAtCompileTime),
- MaxRowsAtCompileTime = Tr ? int(traits<Rhs>::MaxRowsAtCompileTime) : int(traits<Lhs>::MaxRowsAtCompileTime),
- MaxColsAtCompileTime = Tr ? int(traits<Lhs>::MaxColsAtCompileTime) : int(traits<Rhs>::MaxColsAtCompileTime),
-
- Flags = Tr ? RowMajorBit : 0,
-
- CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + NumTraits<Scalar>::MulCost
- };
-};
-
-} // end namespace internal
-
-template<typename Lhs, typename Rhs, bool Tr>
-class SparseDenseOuterProduct
- : public SparseMatrixBase<SparseDenseOuterProduct<Lhs,Rhs,Tr> >
-{
- public:
-
- typedef SparseMatrixBase<SparseDenseOuterProduct> Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(SparseDenseOuterProduct)
- typedef internal::traits<SparseDenseOuterProduct> Traits;
-
- private:
-
- typedef typename Traits::LhsNested LhsNested;
- typedef typename Traits::RhsNested RhsNested;
- typedef typename Traits::_LhsNested _LhsNested;
- typedef typename Traits::_RhsNested _RhsNested;
-
- public:
-
- class InnerIterator;
-
- EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Lhs& lhs, const Rhs& rhs)
- : m_lhs(lhs), m_rhs(rhs)
- {
- EIGEN_STATIC_ASSERT(!Tr,YOU_MADE_A_PROGRAMMING_MISTAKE);
- }
-
- EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Rhs& rhs, const Lhs& lhs)
- : m_lhs(lhs), m_rhs(rhs)
- {
- EIGEN_STATIC_ASSERT(Tr,YOU_MADE_A_PROGRAMMING_MISTAKE);
- }
-
- EIGEN_STRONG_INLINE Index rows() const { return Tr ? Index(m_rhs.rows()) : m_lhs.rows(); }
- EIGEN_STRONG_INLINE Index cols() const { return Tr ? m_lhs.cols() : Index(m_rhs.cols()); }
-
- EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
- EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }
-
- protected:
- LhsNested m_lhs;
- RhsNested m_rhs;
-};
-
-template<typename Lhs, typename Rhs, bool Transpose>
-class SparseDenseOuterProduct<Lhs,Rhs,Transpose>::InnerIterator : public _LhsNested::InnerIterator
-{
- typedef typename _LhsNested::InnerIterator Base;
- typedef typename SparseDenseOuterProduct::Index Index;
- public:
- EIGEN_STRONG_INLINE InnerIterator(const SparseDenseOuterProduct& prod, Index outer)
- : Base(prod.lhs(), 0), m_outer(outer), m_empty(false), m_factor(get(prod.rhs(), outer, typename internal::traits<Rhs>::StorageKind() ))
- {}
-
- inline Index outer() const { return m_outer; }
- inline Index row() const { return Transpose ? m_outer : Base::index(); }
- inline Index col() const { return Transpose ? Base::index() : m_outer; }
-
- inline Scalar value() const { return Base::value() * m_factor; }
- inline operator bool() const { return Base::operator bool() && !m_empty; }
-
- protected:
- Scalar get(const _RhsNested &rhs, Index outer, Dense = Dense()) const
- {
- return rhs.coeff(outer);
- }
-
- Scalar get(const _RhsNested &rhs, Index outer, Sparse = Sparse())
- {
- typename Traits::_RhsNested::InnerIterator it(rhs, outer);
- if (it && it.index()==0 && it.value()!=Scalar(0))
- return it.value();
- m_empty = true;
- return Scalar(0);
- }
-
- Index m_outer;
- bool m_empty;
- Scalar m_factor;
-};
-
-namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<SparseTimeDenseProduct<Lhs,Rhs> >
- : traits<ProductBase<SparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs> >
-{
- typedef Dense StorageKind;
- typedef MatrixXpr XprKind;
-};
+template <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; };
+template <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; };
template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,
typename AlphaType,
@@ -172,16 +30,17 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
typedef typename internal::remove_all<DenseResType>::type Res;
typedef typename Lhs::Index Index;
- typedef typename Lhs::InnerIterator LhsInnerIterator;
+ typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
{
+ typename evaluator<Lhs>::type lhsEval(lhs);
for(Index c=0; c<rhs.cols(); ++c)
{
Index n = lhs.outerSize();
for(Index j=0; j<n; ++j)
{
typename Res::Scalar tmp(0);
- for(LhsInnerIterator it(lhs,j); it ;++it)
+ for(LhsInnerIterator it(lhsEval,j); it ;++it)
tmp += it.value() * rhs.coeff(it.index(),c);
res.coeffRef(j,c) = alpha * tmp;
}
@@ -203,17 +62,18 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
typedef typename internal::remove_all<DenseResType>::type Res;
- typedef typename Lhs::InnerIterator LhsInnerIterator;
typedef typename Lhs::Index Index;
+ typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
{
+ typename evaluator<Lhs>::type lhsEval(lhs);
for(Index c=0; c<rhs.cols(); ++c)
{
for(Index j=0; j<lhs.outerSize(); ++j)
{
// typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
typename internal::scalar_product_traits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
- for(LhsInnerIterator it(lhs,j); it ;++it)
+ for(LhsInnerIterator it(lhsEval,j); it ;++it)
res.coeffRef(it.index(),c) += it.value() * rhs_j;
}
}
@@ -226,14 +86,15 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
typedef typename internal::remove_all<DenseResType>::type Res;
- typedef typename Lhs::InnerIterator LhsInnerIterator;
typedef typename Lhs::Index Index;
+ typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
{
+ typename evaluator<Lhs>::type lhsEval(lhs);
for(Index j=0; j<lhs.outerSize(); ++j)
{
typename Res::RowXpr res_j(res.row(j));
- for(LhsInnerIterator it(lhs,j); it ;++it)
+ for(LhsInnerIterator it(lhsEval,j); it ;++it)
res_j += (alpha*it.value()) * rhs.row(it.index());
}
}
@@ -245,14 +106,15 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
typedef typename internal::remove_all<DenseResType>::type Res;
- typedef typename Lhs::InnerIterator LhsInnerIterator;
typedef typename Lhs::Index Index;
+ typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
{
+ typename evaluator<Lhs>::type lhsEval(lhs);
for(Index j=0; j<lhs.outerSize(); ++j)
{
typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
- for(LhsInnerIterator it(lhs,j); it ;++it)
+ for(LhsInnerIterator it(lhsEval,j); it ;++it)
res.row(it.index()) += (alpha*it.value()) * rhs_j;
}
}
@@ -266,58 +128,164 @@ inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsTy
} // end namespace internal
-template<typename Lhs, typename Rhs>
-class SparseTimeDenseProduct
- : public ProductBase<SparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs>
-{
- public:
- EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseTimeDenseProduct)
-
- SparseTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
- {}
-
- template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
- {
- internal::sparse_time_dense_product(m_lhs, m_rhs, dest, alpha);
- }
+namespace internal {
- private:
- SparseTimeDenseProduct& operator=(const SparseTimeDenseProduct&);
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+ typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+ LhsNested lhsNested(lhs);
+ RhsNested rhsNested(rhs);
+
+ dst.setZero();
+ internal::sparse_time_dense_product(lhsNested, rhsNested, dst, typename Dest::Scalar(1));
+ }
};
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType>
+ : generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
+{};
-// dense = dense * sparse
-namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<DenseTimeSparseProduct<Lhs,Rhs> >
- : traits<ProductBase<DenseTimeSparseProduct<Lhs,Rhs>, Lhs, Rhs> >
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
{
- typedef Dense StorageKind;
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+ typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+ LhsNested lhsNested(lhs);
+ RhsNested rhsNested(rhs);
+
+ dst.setZero();
+ // transpose everything
+ Transpose<Dest> dstT(dst);
+ internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1));
+ }
};
-} // end namespace internal
-template<typename Lhs, typename Rhs>
-class DenseTimeSparseProduct
- : public ProductBase<DenseTimeSparseProduct<Lhs,Rhs>, Lhs, Rhs>
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType>
+ : generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
+{};
+
+template<typename LhsT, typename RhsT, bool NeedToTranspose>
+struct sparse_dense_outer_product_evaluator
{
+protected:
+ typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1;
+ typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs;
+ typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;
+
+ // if the actual left-hand side is a dense vector,
+ // then build a sparse-view so that we can seamlessly iterate over it.
+ typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
+ Lhs1, SparseView<Lhs1> >::type ActualLhs;
+ typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
+ Lhs1 const&, SparseView<Lhs1> >::type LhsArg;
+
+ typedef typename evaluator<ActualLhs>::type LhsEval;
+ typedef typename evaluator<ActualRhs>::type RhsEval;
+ typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;
+ typedef typename ProdXprType::Scalar Scalar;
+ typedef typename ProdXprType::Index Index;
+
+public:
+ enum {
+ Flags = NeedToTranspose ? RowMajorBit : 0,
+ CoeffReadCost = Dynamic
+ };
+
+ class InnerIterator : public LhsIterator
+ {
public:
- EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseProduct)
-
- DenseTimeSparseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+ InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)
+ : LhsIterator(xprEval.m_lhsXprImpl, 0),
+ m_outer(outer),
+ m_empty(false),
+ m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() ))
{}
+
+ EIGEN_STRONG_INLINE Index outer() const { return m_outer; }
+ EIGEN_STRONG_INLINE Index row() const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
+ EIGEN_STRONG_INLINE Index col() const { return NeedToTranspose ? LhsIterator::index() : m_outer; }
- template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }
+ EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }
+
+ protected:
+ Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const
{
- Transpose<const _LhsNested> lhs_t(m_lhs);
- Transpose<const _RhsNested> rhs_t(m_rhs);
- Transpose<Dest> dest_t(dest);
- internal::sparse_time_dense_product(rhs_t, lhs_t, dest_t, alpha);
+ return rhs.coeff(outer);
+ }
+
+ Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse())
+ {
+ typename RhsEval::InnerIterator it(rhs, outer);
+ if (it && it.index()==0 && it.value()!=Scalar(0))
+ return it.value();
+ m_empty = true;
+ return Scalar(0);
}
+
+ Index m_outer;
+ bool m_empty;
+ Scalar m_factor;
+ };
+
+ sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs)
+ : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
+ {}
+
+ // transpose case
+ sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)
+ : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
+ {}
+
+protected:
+ const LhsArg m_lhs;
+ typename evaluator<ActualLhs>::nestedType m_lhsXprImpl;
+ typename evaluator<ActualRhs>::nestedType m_rhsXprImpl;
+};
- private:
- DenseTimeSparseProduct& operator=(const DenseTimeSparseProduct&);
+// sparse * dense outer product
+template<typename Lhs, typename Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
+ : sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor>
+{
+ typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base;
+
+ typedef Product<Lhs, Rhs> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+
+ explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.lhs(), xpr.rhs())
+ {}
+
};
+template<typename Lhs, typename Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
+ : sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor>
+{
+ typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base;
+
+ typedef Product<Lhs, Rhs> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+
+ explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.lhs(), xpr.rhs())
+ {}
+
+};
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_SPARSEDENSEPRODUCT_H
diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h
index c056b4914..be935e9f3 100644
--- a/Eigen/src/SparseCore/SparseDiagonalProduct.h
+++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -26,173 +26,122 @@ namespace Eigen {
namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<SparseDiagonalProduct<Lhs, Rhs> >
-{
- typedef typename remove_all<Lhs>::type _Lhs;
- typedef typename remove_all<Rhs>::type _Rhs;
- typedef typename _Lhs::Scalar Scalar;
- // propagate the index type of the sparse matrix
- typedef typename conditional< is_diagonal<_Lhs>::ret,
- typename traits<Rhs>::Index,
- typename traits<Lhs>::Index>::type Index;
- typedef Sparse StorageKind;
- typedef MatrixXpr XprKind;
- enum {
- RowsAtCompileTime = _Lhs::RowsAtCompileTime,
- ColsAtCompileTime = _Rhs::ColsAtCompileTime,
-
- MaxRowsAtCompileTime = _Lhs::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = _Rhs::MaxColsAtCompileTime,
-
- SparseFlags = is_diagonal<_Lhs>::ret ? int(_Rhs::Flags) : int(_Lhs::Flags),
- Flags = (SparseFlags&RowMajorBit),
- CoeffReadCost = Dynamic
- };
+enum {
+ SDP_AsScalarProduct,
+ SDP_AsCwiseProduct
};
+
+template<typename SparseXprType, typename DiagonalCoeffType, int SDP_Tag>
+struct sparse_diagonal_product_evaluator;
-enum {SDP_IsDiagonal, SDP_IsSparseRowMajor, SDP_IsSparseColMajor};
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType, int RhsMode, int LhsMode>
-class sparse_diagonal_product_inner_iterator_selector;
-
-} // end namespace internal
-
-template<typename Lhs, typename Rhs>
-class SparseDiagonalProduct
- : public SparseMatrixBase<SparseDiagonalProduct<Lhs,Rhs> >,
- internal::no_assignment_operator
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, DiagonalShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
+ : public sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct>
{
- typedef typename Lhs::Nested LhsNested;
- typedef typename Rhs::Nested RhsNested;
-
- typedef typename internal::remove_all<LhsNested>::type _LhsNested;
- typedef typename internal::remove_all<RhsNested>::type _RhsNested;
-
- enum {
- LhsMode = internal::is_diagonal<_LhsNested>::ret ? internal::SDP_IsDiagonal
- : (_LhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor,
- RhsMode = internal::is_diagonal<_RhsNested>::ret ? internal::SDP_IsDiagonal
- : (_RhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor
- };
-
- public:
-
- EIGEN_SPARSE_PUBLIC_INTERFACE(SparseDiagonalProduct)
-
- typedef internal::sparse_diagonal_product_inner_iterator_selector
- <_LhsNested,_RhsNested,SparseDiagonalProduct,LhsMode,RhsMode> InnerIterator;
-
- // We do not want ReverseInnerIterator for diagonal-sparse products,
- // but this dummy declaration is needed to make diag * sparse * diag compile.
- class ReverseInnerIterator;
-
- EIGEN_STRONG_INLINE SparseDiagonalProduct(const Lhs& lhs, const Rhs& rhs)
- : m_lhs(lhs), m_rhs(rhs)
- {
- eigen_assert(lhs.cols() == rhs.rows() && "invalid sparse matrix * diagonal matrix product");
- }
-
- EIGEN_STRONG_INLINE Index rows() const { return Index(m_lhs.rows()); }
- EIGEN_STRONG_INLINE Index cols() const { return Index(m_rhs.cols()); }
-
- EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
- EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }
-
- protected:
- LhsNested m_lhs;
- RhsNested m_rhs;
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef evaluator<XprType> type;
+ typedef evaluator<XprType> nestedType;
+ enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
+
+ typedef sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct> Base;
+ explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
};
-namespace internal {
-
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseRowMajor>
- : public CwiseUnaryOp<scalar_multiple_op<typename Lhs::Scalar>,const Rhs>::InnerIterator
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseShape, DiagonalShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
+ : public sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct>
{
- typedef typename CwiseUnaryOp<scalar_multiple_op<typename Lhs::Scalar>,const Rhs>::InnerIterator Base;
- typedef typename Rhs::Index Index;
- public:
- inline sparse_diagonal_product_inner_iterator_selector(
- const SparseDiagonalProductType& expr, Index outer)
- : Base(expr.rhs()*(expr.lhs().diagonal().coeff(outer)), outer)
- {}
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef evaluator<XprType> type;
+ typedef evaluator<XprType> nestedType;
+ enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
+
+ typedef sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base;
+ explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {}
};
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseColMajor>
- : public CwiseBinaryOp<
- scalar_product_op<typename Lhs::Scalar>,
- const typename Rhs::ConstInnerVectorReturnType,
- const typename Lhs::DiagonalVectorType>::InnerIterator
+template<typename SparseXprType, typename DiagonalCoeffType>
+struct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct>
{
- typedef typename CwiseBinaryOp<
- scalar_product_op<typename Lhs::Scalar>,
- const typename Rhs::ConstInnerVectorReturnType,
- const typename Lhs::DiagonalVectorType>::InnerIterator Base;
- typedef typename Rhs::Index Index;
- Index m_outer;
+protected:
+ typedef typename evaluator<SparseXprType>::InnerIterator SparseXprInnerIterator;
+ typedef typename SparseXprType::Scalar Scalar;
+ typedef typename SparseXprType::Index Index;
+
+public:
+ class InnerIterator : public SparseXprInnerIterator
+ {
public:
- inline sparse_diagonal_product_inner_iterator_selector(
- const SparseDiagonalProductType& expr, Index outer)
- : Base(expr.rhs().innerVector(outer) .cwiseProduct(expr.lhs().diagonal()), 0), m_outer(outer)
+ InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer)
+ : SparseXprInnerIterator(xprEval.m_sparseXprImpl, outer),
+ m_coeff(xprEval.m_diagCoeffImpl.coeff(outer))
{}
- inline Index outer() const { return m_outer; }
- inline Index col() const { return m_outer; }
+ EIGEN_STRONG_INLINE Scalar value() const { return m_coeff * SparseXprInnerIterator::value(); }
+ protected:
+ typename DiagonalCoeffType::Scalar m_coeff;
+ };
+
+ sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagonalCoeffType &diagCoeff)
+ : m_sparseXprImpl(sparseXpr), m_diagCoeffImpl(diagCoeff)
+ {}
+
+protected:
+ typename evaluator<SparseXprType>::nestedType m_sparseXprImpl;
+ typename evaluator<DiagonalCoeffType>::nestedType m_diagCoeffImpl;
};
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseColMajor,SDP_IsDiagonal>
- : public CwiseUnaryOp<scalar_multiple_op<typename Rhs::Scalar>,const Lhs>::InnerIterator
-{
- typedef typename CwiseUnaryOp<scalar_multiple_op<typename Rhs::Scalar>,const Lhs>::InnerIterator Base;
- typedef typename Lhs::Index Index;
- public:
- inline sparse_diagonal_product_inner_iterator_selector(
- const SparseDiagonalProductType& expr, Index outer)
- : Base(expr.lhs()*expr.rhs().diagonal().coeff(outer), outer)
- {}
-};
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseRowMajor,SDP_IsDiagonal>
- : public CwiseBinaryOp<
- scalar_product_op<typename Rhs::Scalar>,
- const typename Lhs::ConstInnerVectorReturnType,
- const Transpose<const typename Rhs::DiagonalVectorType> >::InnerIterator
+template<typename SparseXprType, typename DiagCoeffType>
+struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct>
{
- typedef typename CwiseBinaryOp<
- scalar_product_op<typename Rhs::Scalar>,
- const typename Lhs::ConstInnerVectorReturnType,
- const Transpose<const typename Rhs::DiagonalVectorType> >::InnerIterator Base;
- typedef typename Lhs::Index Index;
- Index m_outer;
+ typedef typename SparseXprType::Scalar Scalar;
+ typedef typename SparseXprType::Index Index;
+
+ typedef CwiseBinaryOp<scalar_product_op<Scalar>,
+ const typename SparseXprType::ConstInnerVectorReturnType,
+ const DiagCoeffType> CwiseProductType;
+
+ typedef typename evaluator<CwiseProductType>::type CwiseProductEval;
+ typedef typename evaluator<CwiseProductType>::InnerIterator CwiseProductIterator;
+
+ class InnerIterator
+ {
public:
- inline sparse_diagonal_product_inner_iterator_selector(
- const SparseDiagonalProductType& expr, Index outer)
- : Base(expr.lhs().innerVector(outer) .cwiseProduct(expr.rhs().diagonal().transpose()), 0), m_outer(outer)
+ InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer)
+ : m_cwiseEval(xprEval.m_sparseXprNested.innerVector(outer).cwiseProduct(xprEval.m_diagCoeffNested)),
+ m_cwiseIter(m_cwiseEval, 0),
+ m_outer(outer)
{}
- inline Index outer() const { return m_outer; }
- inline Index row() const { return m_outer; }
+ inline Scalar value() const { return m_cwiseIter.value(); }
+ inline Index index() const { return m_cwiseIter.index(); }
+ inline Index outer() const { return m_outer; }
+ inline Index col() const { return SparseXprType::IsRowMajor ? m_cwiseIter.index() : m_outer; }
+ inline Index row() const { return SparseXprType::IsRowMajor ? m_outer : m_cwiseIter.index(); }
+
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
+ { ++m_cwiseIter; return *this; }
+ inline operator bool() const { return m_cwiseIter; }
+
+ protected:
+ CwiseProductEval m_cwiseEval;
+ CwiseProductIterator m_cwiseIter;
+ Index m_outer;
+ };
+
+ sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagCoeffType &diagCoeff)
+ : m_sparseXprNested(sparseXpr), m_diagCoeffNested(diagCoeff)
+ {}
+
+protected:
+ typename nested_eval<SparseXprType,1>::type m_sparseXprNested;
+ typename nested_eval<DiagCoeffType,SparseXprType::IsRowMajor ? SparseXprType::RowsAtCompileTime
+ : SparseXprType::ColsAtCompileTime>::type m_diagCoeffNested;
};
} // end namespace internal
-// SparseMatrixBase functions
-
-template<typename Derived>
-template<typename OtherDerived>
-const SparseDiagonalProduct<Derived,OtherDerived>
-SparseMatrixBase<Derived>::operator*(const DiagonalBase<OtherDerived> &other) const
-{
- return SparseDiagonalProduct<Derived,OtherDerived>(this->derived(), other.derived());
-}
-
} // end namespace Eigen
#endif // EIGEN_SPARSE_DIAGONAL_PRODUCT_H
diff --git a/Eigen/src/SparseCore/SparseDot.h b/Eigen/src/SparseCore/SparseDot.h
index db39c9aec..b10c8058f 100644
--- a/Eigen/src/SparseCore/SparseDot.h
+++ b/Eigen/src/SparseCore/SparseDot.h
@@ -26,7 +26,8 @@ SparseMatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
eigen_assert(size() == other.size());
eigen_assert(other.size()>0 && "you are using a non initialized vector");
- typename Derived::InnerIterator i(derived(),0);
+ typename internal::evaluator<Derived>::type thisEval(derived());
+ typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);
Scalar res(0);
while (i)
{
@@ -49,16 +50,12 @@ SparseMatrixBase<Derived>::dot(const SparseMatrixBase<OtherDerived>& other) cons
eigen_assert(size() == other.size());
- typedef typename Derived::Nested Nested;
- typedef typename OtherDerived::Nested OtherNested;
- typedef typename internal::remove_all<Nested>::type NestedCleaned;
- typedef typename internal::remove_all<OtherNested>::type OtherNestedCleaned;
+ typename internal::evaluator<Derived>::type thisEval(derived());
+ typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);
+
+ typename internal::evaluator<OtherDerived>::type otherEval(other.derived());
+ typename internal::evaluator<OtherDerived>::InnerIterator j(otherEval, 0);
- Nested nthis(derived());
- OtherNested nother(other.derived());
-
- typename NestedCleaned::InnerIterator i(nthis,0);
- typename OtherNestedCleaned::InnerIterator j(nother,0);
Scalar res(0);
while (i && j)
{
diff --git a/Eigen/src/SparseCore/SparseFuzzy.h b/Eigen/src/SparseCore/SparseFuzzy.h
index 45f36e9eb..7d47eb94d 100644
--- a/Eigen/src/SparseCore/SparseFuzzy.h
+++ b/Eigen/src/SparseCore/SparseFuzzy.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -10,17 +10,20 @@
#ifndef EIGEN_SPARSE_FUZZY_H
#define EIGEN_SPARSE_FUZZY_H
-// template<typename Derived>
-// template<typename OtherDerived>
-// bool SparseMatrixBase<Derived>::isApprox(
-// const OtherDerived& other,
-// typename NumTraits<Scalar>::Real prec
-// ) const
-// {
-// const typename internal::nested<Derived,2>::type nested(derived());
-// const typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
-// return (nested - otherNested).cwise().abs2().sum()
-// <= prec * prec * (std::min)(nested.cwise().abs2().sum(), otherNested.cwise().abs2().sum());
-// }
+namespace Eigen {
+
+template<typename Derived>
+template<typename OtherDerived>
+bool SparseMatrixBase<Derived>::isApprox(const SparseMatrixBase<OtherDerived>& other, const RealScalar &prec) const
+{
+ const typename internal::nested_eval<Derived,2,PlainObject>::type actualA(derived());
+ typename internal::conditional<bool(IsRowMajor)==bool(OtherDerived::IsRowMajor),
+ const typename internal::nested_eval<OtherDerived,2,PlainObject>::type,
+ const PlainObject>::type actualB(other.derived());
+
+ return (actualA - actualB).squaredNorm() <= prec * prec * numext::mini(actualA.squaredNorm(), actualB.squaredNorm());
+}
+
+} // end namespace Eigen
#endif // EIGEN_SPARSE_FUZZY_H
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 2ed2f3ebd..93677c786 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -52,13 +52,12 @@ struct traits<SparseMatrix<_Scalar, _Options, _Index> >
MaxRowsAtCompileTime = Dynamic,
MaxColsAtCompileTime = Dynamic,
Flags = _Options | NestByRefBit | LvalueBit,
- CoeffReadCost = NumTraits<Scalar>::ReadCost,
SupportedAccessPatterns = InnerRandomAccessPattern
};
};
template<typename _Scalar, int _Options, typename _Index, int DiagIndex>
-struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> >
+struct traits<Diagonal<SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> >
{
typedef SparseMatrix<_Scalar, _Options, _Index> MatrixType;
typedef typename nested<MatrixType>::type MatrixTypeNested;
@@ -74,8 +73,16 @@ struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _Index>, DiagIndex>
ColsAtCompileTime = 1,
MaxRowsAtCompileTime = Dynamic,
MaxColsAtCompileTime = 1,
- Flags = 0,
- CoeffReadCost = _MatrixTypeNested::CoeffReadCost*10
+ Flags = LvalueBit
+ };
+};
+
+template<typename _Scalar, int _Options, typename _Index, int DiagIndex>
+struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> >
+ : public traits<Diagonal<SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> >
+{
+ enum {
+ Flags = 0
};
};
@@ -91,6 +98,10 @@ class SparseMatrix
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=)
typedef MappedSparseMatrix<Scalar,Flags> Map;
+ typedef Diagonal<SparseMatrix> DiagonalReturnType;
+ typedef Diagonal<const SparseMatrix> ConstDiagonalReturnType;
+
+
using Base::IsRowMajor;
typedef internal::CompressedStorage<Scalar,Index> Storage;
enum {
@@ -274,7 +285,7 @@ class SparseMatrix
}
template<class SizesType>
inline void reserve(const SizesType& reserveSizes, const typename SizesType::Scalar& enableif =
- #if (!defined(_MSC_VER)) || (_MSC_VER>=1500) // MSVC 2005 fails to compile with this typename
+ #if (!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1500) // MSVC 2005 fails to compile with this typename
typename
#endif
SizesType::Scalar())
@@ -622,8 +633,14 @@ class SparseMatrix
m_data.resize(size);
}
- /** \returns a const expression of the diagonal coefficients */
- const Diagonal<const SparseMatrix> diagonal() const { return *this; }
+ /** \returns a const expression of the diagonal coefficients. */
+ const ConstDiagonalReturnType diagonal() const { return ConstDiagonalReturnType(*this); }
+
+ /** \returns a read-write expression of the diagonal coefficients.
+ * \warning If the diagonal entries are written, then all diagonal
+ * entries \b must already exist, otherwise an assertion will be raised.
+ */
+ DiagonalReturnType diagonal() { return DiagonalReturnType(*this); }
/** Default constructor yielding an empty \c 0 \c x \c 0 matrix */
inline SparseMatrix()
@@ -649,7 +666,9 @@ class SparseMatrix
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
check_template_parameters();
- *this = other.derived();
+ const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit);
+ if (needToTranspose) *this = other.derived();
+ else internal::call_assignment_no_alias(*this, other.derived());
}
/** Constructs a sparse matrix from the sparse selfadjoint view \a other */
@@ -658,7 +677,7 @@ class SparseMatrix
: m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
{
check_template_parameters();
- *this = other;
+ Base::operator=(other);
}
/** Copy constructor (it performs a deep copy) */
@@ -722,22 +741,11 @@ class SparseMatrix
return *this;
}
- #ifndef EIGEN_PARSED_BY_DOXYGEN
- template<typename Lhs, typename Rhs>
- inline SparseMatrix& operator=(const SparseSparseProduct<Lhs,Rhs>& product)
- { return Base::operator=(product); }
-
- template<typename OtherDerived>
- inline SparseMatrix& operator=(const ReturnByValue<OtherDerived>& other)
- {
- initAssignment(other);
- return Base::operator=(other.derived());
- }
-
+#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
inline SparseMatrix& operator=(const EigenBase<OtherDerived>& other)
{ return Base::operator=(other.derived()); }
- #endif
+#endif // EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
@@ -898,6 +906,11 @@ class SparseMatrix<Scalar,_Options,_Index>::InnerIterator
const Index m_outer;
Index m_id;
Index m_end;
+ private:
+ // If you get here, then you're not using the right InnerIterator type, e.g.:
+ // SparseMatrix<double,RowMajor> A;
+ // SparseMatrix<double>::InnerIterator it(A,0);
+ template<typename T> InnerIterator(const SparseMatrixBase<T>&,Index outer);
};
template<typename Scalar, int _Options, typename _Index>
@@ -1061,17 +1074,19 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
{
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
- const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+
+ const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit);
if (needToTranspose)
{
// two passes algorithm:
// 1 - compute the number of coeffs per dest inner vector
// 2 - do the actual copy/eval
// Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
- typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
+ typedef typename internal::nested_eval<OtherDerived,2,typename internal::plain_matrix_type<OtherDerived>::type >::type OtherCopy;
typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
+ typedef internal::evaluator<_OtherCopy> OtherCopyEval;
OtherCopy otherCopy(other.derived());
+ OtherCopyEval otherCopyEval(otherCopy);
SparseMatrix dest(other.rows(),other.cols());
Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
@@ -1079,7 +1094,7 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
// pass 1
// FIXME the above copy could be merged with that pass
for (Index j=0; j<otherCopy.outerSize(); ++j)
- for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+ for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)
++dest.m_outerIndex[it.index()];
// prefix sum
@@ -1098,7 +1113,7 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
// pass 2
for (Index j=0; j<otherCopy.outerSize(); ++j)
{
- for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+ for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)
{
Index pos = positions[it.index()]++;
dest.m_data.index(pos) = j;
@@ -1111,7 +1126,9 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
else
{
if(other.isRValue())
+ {
initAssignment(other.derived());
+ }
// there is no special optimization
return Base::operator=(other.derived());
}
@@ -1256,6 +1273,53 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse
return (m_data.value(p) = 0);
}
+namespace internal {
+
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<SparseMatrix<_Scalar,_Options,_Index> >
+ : evaluator_base<SparseMatrix<_Scalar,_Options,_Index> >
+{
+ typedef _Scalar Scalar;
+ typedef _Index Index;
+ typedef SparseMatrix<_Scalar,_Options,_Index> SparseMatrixType;
+ typedef typename SparseMatrixType::InnerIterator InnerIterator;
+ typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
+
+ enum {
+ CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+ Flags = SparseMatrixType::Flags
+ };
+
+ evaluator() : m_matrix(0) {}
+ explicit evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {}
+
+ operator SparseMatrixType&() { return m_matrix->const_cast_derived(); }
+ operator const SparseMatrixType&() const { return *m_matrix; }
+
+ typedef typename DenseCoeffsBase<SparseMatrixType,ReadOnlyAccessors>::CoeffReturnType CoeffReturnType;
+ Scalar coeff(Index row, Index col) const
+ { return m_matrix->coeff(row,col); }
+
+ Scalar& coeffRef(Index row, Index col)
+ {
+ eigen_internal_assert(row>=0 && row<m_matrix->rows() && col>=0 && col<m_matrix->cols());
+
+ const Index outer = SparseMatrixType::IsRowMajor ? row : col;
+ const Index inner = SparseMatrixType::IsRowMajor ? col : row;
+
+ Index start = m_matrix->outerIndexPtr()[outer];
+ Index end = m_matrix->isCompressed() ? m_matrix->outerIndexPtr()[outer+1] : m_matrix->outerIndexPtr()[outer] + m_matrix->innerNonZeroPtr()[outer];
+ eigen_assert(end>start && "you are using a non finalized sparse matrix or written coefficient does not exist");
+ const Index p = m_matrix->data().searchLowerIndex(start,end-1,inner);
+ eigen_assert((p<end) && (m_matrix->data().index(p)==inner) && "written coefficient does not exist");
+ return m_matrix->const_cast_derived().data().value(p);
+ }
+
+ const SparseMatrixType *m_matrix;
+};
+
+}
+
} // end namespace Eigen
#endif // EIGEN_SPARSEMATRIX_H
diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h
index fb5025049..04baabe4f 100644
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -39,11 +39,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
typedef EigenBase<Derived> Base;
template<typename OtherDerived>
- Derived& operator=(const EigenBase<OtherDerived> &other)
- {
- other.derived().evalTo(derived());
- return derived();
- }
+ Derived& operator=(const EigenBase<OtherDerived> &other);
enum {
@@ -83,11 +79,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
* constructed from this one. See the \ref flags "list of flags".
*/
- CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
- /**< This is a rough measure of how expensive it is to read one coefficient from
- * this expression.
- */
-
IsRowMajor = Flags&RowMajorBit ? 1 : 0,
InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
@@ -103,11 +94,13 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, Eigen::Transpose<const Derived> >,
Transpose<const Derived>
>::type AdjointReturnType;
+ typedef Transpose<Derived> TransposeReturnType;
+ template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
+ typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
-
+ // FIXME storage order do not match evaluator storage order
typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, Index> PlainObject;
-
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is the "real scalar" type; if the \a Scalar type is already real numbers
* (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If
@@ -124,6 +117,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Matrix<Scalar,Dynamic,Dynamic> > ConstantReturnType;
+ /** type of the equivalent dense matrix */
+ typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
/** type of the equivalent square matrix */
typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
@@ -175,93 +170,23 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
template<typename OtherDerived>
- Derived& operator=(const ReturnByValue<OtherDerived>& other)
- {
- other.evalTo(derived());
- return derived();
- }
-
+ Derived& operator=(const ReturnByValue<OtherDerived>& other);
template<typename OtherDerived>
- inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other)
- {
- return assign(other.derived());
- }
+ inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other);
- inline Derived& operator=(const Derived& other)
- {
-// if (other.isRValue())
-// derived().swap(other.const_cast_derived());
-// else
- return assign(other.derived());
- }
+ inline Derived& operator=(const Derived& other);
protected:
template<typename OtherDerived>
- inline Derived& assign(const OtherDerived& other)
- {
- const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
- const Index outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? Index(other.rows()) : Index(other.cols());
- if ((!transpose) && other.isRValue())
- {
- // eval without temporary
- derived().resize(Index(other.rows()), Index(other.cols()));
- derived().setZero();
- derived().reserve((std::max)(this->rows(),this->cols())*2);
- for (Index j=0; j<outerSize; ++j)
- {
- derived().startVec(j);
- for (typename OtherDerived::InnerIterator it(other, typename OtherDerived::Index(j)); it; ++it)
- {
- Scalar v = it.value();
- derived().insertBackByOuterInner(j,Index(it.index())) = v;
- }
- }
- derived().finalize();
- }
- else
- {
- assignGeneric(other);
- }
- return derived();
- }
+ inline Derived& assign(const OtherDerived& other);
template<typename OtherDerived>
- inline void assignGeneric(const OtherDerived& other)
- {
- //const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
- eigen_assert(( ((internal::traits<Derived>::SupportedAccessPatterns&OuterRandomAccessPattern)==OuterRandomAccessPattern) ||
- (!((Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit)))) &&
- "the transpose operation is supposed to be handled in SparseMatrix::operator=");
-
- enum { Flip = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit) };
-
- const Index outerSize = Index(other.outerSize());
- //typedef typename internal::conditional<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::type TempType;
- // thanks to shallow copies, we always eval to a tempary
- Derived temp(Index(other.rows()), Index(other.cols()));
-
- temp.reserve((std::max)(this->rows(),this->cols())*2);
- for (Index j=0; j<outerSize; ++j)
- {
- temp.startVec(j);
- for (typename OtherDerived::InnerIterator it(other.derived(), typename OtherDerived::Index(j)); it; ++it)
- {
- Scalar v = it.value();
- temp.insertBackByOuterInner(Flip?Index(it.index()):j,Flip?j:Index(it.index())) = v;
- }
- }
- temp.finalize();
-
- derived() = temp.markAsRValue();
- }
+ inline void assignGeneric(const OtherDerived& other);
public:
- template<typename Lhs, typename Rhs>
- inline Derived& operator=(const SparseSparseProduct<Lhs,Rhs>& product);
-
friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)
{
typedef typename Derived::Nested Nested;
@@ -333,33 +258,34 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE
cwiseProduct(const MatrixBase<OtherDerived> &other) const;
- // sparse * sparse
- template<typename OtherDerived>
- const typename SparseSparseProductReturnType<Derived,OtherDerived>::Type
- operator*(const SparseMatrixBase<OtherDerived> &other) const;
-
// sparse * diagonal
template<typename OtherDerived>
- const SparseDiagonalProduct<Derived,OtherDerived>
- operator*(const DiagonalBase<OtherDerived> &other) const;
+ const Product<Derived,OtherDerived>
+ operator*(const DiagonalBase<OtherDerived> &other) const
+ { return Product<Derived,OtherDerived>(derived(), other.derived()); }
// diagonal * sparse
template<typename OtherDerived> friend
- const SparseDiagonalProduct<OtherDerived,Derived>
+ const Product<OtherDerived,Derived>
operator*(const DiagonalBase<OtherDerived> &lhs, const SparseMatrixBase& rhs)
- { return SparseDiagonalProduct<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }
-
- /** dense * sparse (return a dense object unless it is an outer product) */
- template<typename OtherDerived> friend
- const typename DenseSparseProductReturnType<OtherDerived,Derived>::Type
- operator*(const MatrixBase<OtherDerived>& lhs, const Derived& rhs)
- { return typename DenseSparseProductReturnType<OtherDerived,Derived>::Type(lhs.derived(),rhs); }
-
- /** sparse * dense (returns a dense object unless it is an outer product) */
+ { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }
+
+ // sparse * sparse
template<typename OtherDerived>
- const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type
+ const Product<Derived,OtherDerived>
+ operator*(const SparseMatrixBase<OtherDerived> &other) const;
+
+ // sparse * dense
+ template<typename OtherDerived>
+ const Product<Derived,OtherDerived>
operator*(const MatrixBase<OtherDerived> &other) const
- { return typename SparseDenseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); }
+ { return Product<Derived,OtherDerived>(derived(), other.derived()); }
+
+ // dense * sparse
+ template<typename OtherDerived> friend
+ const Product<OtherDerived,Derived>
+ operator*(const MatrixBase<OtherDerived> &lhs, const SparseMatrixBase& rhs)
+ { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }
/** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */
SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
@@ -371,9 +297,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
Derived& operator*=(const SparseMatrixBase<OtherDerived>& other);
template<int Mode>
- inline const SparseTriangularView<Derived, Mode> triangularView() const;
+ inline const TriangularView<const Derived, Mode> triangularView() const;
- template<unsigned int UpLo> inline const SparseSelfAdjointView<Derived, UpLo> selfadjointView() const;
+ template<unsigned int UpLo> inline const SparseSelfAdjointView<const Derived, UpLo> selfadjointView() const;
template<unsigned int UpLo> inline SparseSelfAdjointView<Derived, UpLo> selfadjointView();
template<typename OtherDerived> Scalar dot(const MatrixBase<OtherDerived>& other) const;
@@ -382,9 +308,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
RealScalar norm() const;
RealScalar blueNorm() const;
- Transpose<Derived> transpose() { return derived(); }
- const Transpose<const Derived> transpose() const { return derived(); }
- const AdjointReturnType adjoint() const { return transpose(); }
+ TransposeReturnType transpose() { return TransposeReturnType(derived()); }
+ const ConstTransposeReturnType transpose() const { return ConstTransposeReturnType(derived()); }
+ const AdjointReturnType adjoint() const { return AdjointReturnType(transpose()); }
// inner-vector
typedef Block<Derived,IsRowMajor?1:Dynamic,IsRowMajor?Dynamic:1,true> InnerVectorReturnType;
@@ -396,25 +322,14 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
Block<Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize);
const Block<const Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize) const;
- /** \internal use operator= */
- template<typename DenseDerived>
- void evalTo(MatrixBase<DenseDerived>& dst) const
- {
- dst.setZero();
- for (Index j=0; j<outerSize(); ++j)
- for (typename Derived::InnerIterator i(derived(),typename Derived::Index(j)); i; ++i)
- dst.coeffRef(i.row(),i.col()) = i.value();
- }
-
- Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> toDense() const
+ DenseMatrixType toDense() const
{
- return derived();
+ return DenseMatrixType(derived());
}
template<typename OtherDerived>
bool isApprox(const SparseMatrixBase<OtherDerived>& other,
- const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
- { return toDense().isApprox(other.toDense(),prec); }
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other,
@@ -430,6 +345,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
{ return typename internal::eval<Derived>::type(derived()); }
Scalar sum() const;
+
+ inline const SparseView<Derived>
+ pruned(const Scalar& reference = Scalar(0), const RealScalar& epsilon = NumTraits<Scalar>::dummy_precision()) const;
protected:
diff --git a/Eigen/src/SparseCore/SparsePermutation.h b/Eigen/src/SparseCore/SparsePermutation.h
index b85be93f6..21411f232 100644
--- a/Eigen/src/SparseCore/SparsePermutation.h
+++ b/Eigen/src/SparseCore/SparsePermutation.h
@@ -61,7 +61,7 @@ struct permut_sparsematrix_product_retval
for(Index j=0; j<m_matrix.outerSize(); ++j)
{
Index jp = m_permutation.indices().coeff(j);
- sizes[((Side==OnTheLeft) ^ Transposed) ? jp : j] = m_matrix.innerVector(((Side==OnTheRight) ^ Transposed) ? jp : j).size();
+ sizes[((Side==OnTheLeft) ^ Transposed) ? jp : j] = m_matrix.innerVector(((Side==OnTheRight) ^ Transposed) ? jp : j).nonZeros();
}
tmp.reserve(sizes);
for(Index j=0; j<m_matrix.outerSize(); ++j)
@@ -103,44 +103,133 @@ struct permut_sparsematrix_product_retval
}
+namespace internal {
+
+template <int ProductTag> struct product_promote_storage_type<Sparse, PermutationStorage, ProductTag> { typedef Sparse ret; };
+template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Sparse, ProductTag> { typedef Sparse ret; };
+
+// TODO, the following need cleaning, this is just a copy-past of the dense case
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ permut_sparsematrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs);
+ pmpr.evalTo(dst);
+ }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, SparseShape, PermutationShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ permut_sparsematrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs);
+ pmpr.evalTo(dst);
+ }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, SparseShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
+ {
+ permut_sparsematrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs);
+ pmpr.evalTo(dst);
+ }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Transpose<Rhs>, SparseShape, PermutationShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
+ {
+ permut_sparsematrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs);
+ pmpr.evalTo(dst);
+ }
+};
+
+// TODO, the following two overloads are only needed to define the right temporary type through
+// typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType
+// while it should be correctly handled by traits<Product<> >::PlainObject
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, PermutationShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
+ : public evaluator<typename traits<permut_sparsematrix_product_retval<Lhs,Rhs,OnTheRight,false> >::ReturnType>::type
+{
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef typename traits<permut_sparsematrix_product_retval<Lhs,Rhs,OnTheRight,false> >::ReturnType PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ explicit product_evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseShape, PermutationShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar>
+ : public evaluator<typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType>::type
+{
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ explicit product_evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ generic_product_impl<Lhs, Rhs, SparseShape, PermutationShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+} // end namespace internal
/** \returns the matrix with the permutation applied to the columns
*/
template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, false>
+inline const Product<SparseDerived, PermDerived>
operator*(const SparseMatrixBase<SparseDerived>& matrix, const PermutationBase<PermDerived>& perm)
-{
- return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, false>(perm, matrix.derived());
-}
+{ return Product<SparseDerived, PermDerived>(matrix.derived(), perm.derived()); }
/** \returns the matrix with the permutation applied to the rows
*/
template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, false>
+inline const Product<PermDerived, SparseDerived>
operator*( const PermutationBase<PermDerived>& perm, const SparseMatrixBase<SparseDerived>& matrix)
-{
- return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, false>(perm, matrix.derived());
-}
-
+{ return Product<PermDerived, SparseDerived>(perm.derived(), matrix.derived()); }
+// TODO, the following specializations should not be needed as Transpose<Permutation*> should be a PermutationBase.
/** \returns the matrix with the inverse permutation applied to the columns.
*/
template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, true>
+inline const Product<SparseDerived, Transpose<PermutationBase<PermDerived> > >
operator*(const SparseMatrixBase<SparseDerived>& matrix, const Transpose<PermutationBase<PermDerived> >& tperm)
{
- return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, true>(tperm.nestedPermutation(), matrix.derived());
+ return Product<SparseDerived, Transpose<PermutationBase<PermDerived> > >(matrix.derived(), tperm);
}
/** \returns the matrix with the inverse permutation applied to the rows.
*/
template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, true>
+inline const Product<Transpose<PermutationBase<PermDerived> >, SparseDerived>
operator*(const Transpose<PermutationBase<PermDerived> >& tperm, const SparseMatrixBase<SparseDerived>& matrix)
{
- return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, true>(tperm.nestedPermutation(), matrix.derived());
+ return Product<Transpose<PermutationBase<PermDerived> >, SparseDerived>(tperm, matrix.derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h
index cf7663070..c62386ed1 100644
--- a/Eigen/src/SparseCore/SparseProduct.h
+++ b/Eigen/src/SparseCore/SparseProduct.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,158 +12,6 @@
namespace Eigen {
-template<typename Lhs, typename Rhs>
-struct SparseSparseProductReturnType
-{
- typedef typename internal::traits<Lhs>::Scalar Scalar;
- typedef typename internal::traits<Lhs>::Index Index;
- enum {
- LhsRowMajor = internal::traits<Lhs>::Flags & RowMajorBit,
- RhsRowMajor = internal::traits<Rhs>::Flags & RowMajorBit,
- TransposeRhs = (!LhsRowMajor) && RhsRowMajor,
- TransposeLhs = LhsRowMajor && (!RhsRowMajor)
- };
-
- typedef typename internal::conditional<TransposeLhs,
- SparseMatrix<Scalar,0,Index>,
- typename internal::nested<Lhs,Rhs::RowsAtCompileTime>::type>::type LhsNested;
-
- typedef typename internal::conditional<TransposeRhs,
- SparseMatrix<Scalar,0,Index>,
- typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type>::type RhsNested;
-
- typedef SparseSparseProduct<LhsNested, RhsNested> Type;
-};
-
-namespace internal {
-template<typename LhsNested, typename RhsNested>
-struct traits<SparseSparseProduct<LhsNested, RhsNested> >
-{
- typedef MatrixXpr XprKind;
- // clean the nested types:
- typedef typename remove_all<LhsNested>::type _LhsNested;
- typedef typename remove_all<RhsNested>::type _RhsNested;
- typedef typename _LhsNested::Scalar Scalar;
- typedef typename promote_index_type<typename traits<_LhsNested>::Index,
- typename traits<_RhsNested>::Index>::type Index;
-
- enum {
- LhsCoeffReadCost = _LhsNested::CoeffReadCost,
- RhsCoeffReadCost = _RhsNested::CoeffReadCost,
- LhsFlags = _LhsNested::Flags,
- RhsFlags = _RhsNested::Flags,
-
- RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
- ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
- MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
-
- InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
-
- EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit),
-
- RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit),
-
- Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
- | EvalBeforeAssigningBit
- | EvalBeforeNestingBit,
-
- CoeffReadCost = Dynamic
- };
-
- typedef Sparse StorageKind;
-};
-
-} // end namespace internal
-
-template<typename LhsNested, typename RhsNested>
-class SparseSparseProduct : internal::no_assignment_operator,
- public SparseMatrixBase<SparseSparseProduct<LhsNested, RhsNested> >
-{
- public:
-
- typedef SparseMatrixBase<SparseSparseProduct> Base;
- EIGEN_DENSE_PUBLIC_INTERFACE(SparseSparseProduct)
-
- private:
-
- typedef typename internal::traits<SparseSparseProduct>::_LhsNested _LhsNested;
- typedef typename internal::traits<SparseSparseProduct>::_RhsNested _RhsNested;
-
- public:
-
- template<typename Lhs, typename Rhs>
- EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs)
- : m_lhs(lhs), m_rhs(rhs), m_tolerance(0), m_conservative(true)
- {
- init();
- }
-
- template<typename Lhs, typename Rhs>
- EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs, const RealScalar& tolerance)
- : m_lhs(lhs), m_rhs(rhs), m_tolerance(tolerance), m_conservative(false)
- {
- init();
- }
-
- SparseSparseProduct pruned(const Scalar& reference = 0, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) const
- {
- using std::abs;
- return SparseSparseProduct(m_lhs,m_rhs,abs(reference)*epsilon);
- }
-
- template<typename Dest>
- void evalTo(Dest& result) const
- {
- if(m_conservative)
- internal::conservative_sparse_sparse_product_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result);
- else
- internal::sparse_sparse_product_with_pruning_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result,m_tolerance);
- }
-
- EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
- EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
-
- EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
- EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }
-
- protected:
- void init()
- {
- eigen_assert(m_lhs.cols() == m_rhs.rows());
-
- enum {
- ProductIsValid = _LhsNested::ColsAtCompileTime==Dynamic
- || _RhsNested::RowsAtCompileTime==Dynamic
- || int(_LhsNested::ColsAtCompileTime)==int(_RhsNested::RowsAtCompileTime),
- AreVectors = _LhsNested::IsVectorAtCompileTime && _RhsNested::IsVectorAtCompileTime,
- SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(_LhsNested,_RhsNested)
- };
- // note to the lost user:
- // * for a dot product use: v1.dot(v2)
- // * for a coeff-wise product use: v1.cwise()*v2
- EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
- INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
- EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
- INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
- EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
- }
-
- LhsNested m_lhs;
- RhsNested m_rhs;
- RealScalar m_tolerance;
- bool m_conservative;
-};
-
-// sparse = sparse * sparse
-template<typename Derived>
-template<typename Lhs, typename Rhs>
-inline Derived& SparseMatrixBase<Derived>::operator=(const SparseSparseProduct<Lhs,Rhs>& product)
-{
- product.evalTo(derived());
- return derived();
-}
-
/** \returns an expression of the product of two sparse matrices.
* By default a conservative product preserving the symbolic non zeros is performed.
* The automatic pruning of the small values can be achieved by calling the pruned() function
@@ -177,12 +25,74 @@ inline Derived& SparseMatrixBase<Derived>::operator=(const SparseSparseProduct<L
* */
template<typename Derived>
template<typename OtherDerived>
-inline const typename SparseSparseProductReturnType<Derived,OtherDerived>::Type
+inline const Product<Derived,OtherDerived>
SparseMatrixBase<Derived>::operator*(const SparseMatrixBase<OtherDerived> &other) const
{
- return typename SparseSparseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+ return Product<Derived,OtherDerived>(derived(), other.derived());
}
+namespace internal {
+
+// sparse * sparse
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+ typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+ LhsNested lhsNested(lhs);
+ RhsNested rhsNested(rhs);
+ internal::conservative_sparse_sparse_product_selector<typename remove_all<LhsNested>::type,
+ typename remove_all<RhsNested>::type, Dest>::run(lhsNested,rhsNested,dst);
+ }
+};
+
+// sparse * sparse-triangular
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, SparseShape, SparseTriangularShape, ProductType>
+ : public generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>
+{};
+
+// sparse-triangular * sparse
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, SparseShape, ProductType>
+ : public generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>
+{};
+
+template<typename Lhs, typename Rhs, int Options>
+struct evaluator<SparseView<Product<Lhs, Rhs, Options> > >
+ : public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type
+{
+ typedef SparseView<Product<Lhs, Rhs, Options> > XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ explicit evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ using std::abs;
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+ typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+ LhsNested lhsNested(xpr.nestedExpression().lhs());
+ RhsNested rhsNested(xpr.nestedExpression().rhs());
+
+ internal::sparse_sparse_product_with_pruning_selector<typename remove_all<LhsNested>::type,
+ typename remove_all<RhsNested>::type, PlainObject>::run(lhsNested,rhsNested,m_result,
+ abs(xpr.reference())*xpr.epsilon());
+ }
+
+protected:
+ PlainObject m_result;
+};
+
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_SPARSEPRODUCT_H
diff --git a/Eigen/src/SparseCore/SparseRedux.h b/Eigen/src/SparseCore/SparseRedux.h
index f3da93a71..763f2296b 100644
--- a/Eigen/src/SparseCore/SparseRedux.h
+++ b/Eigen/src/SparseCore/SparseRedux.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -18,8 +18,9 @@ SparseMatrixBase<Derived>::sum() const
{
eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix");
Scalar res(0);
+ typename internal::evaluator<Derived>::type thisEval(derived());
for (Index j=0; j<outerSize(); ++j)
- for (typename Derived::InnerIterator iter(derived(),j); iter; ++iter)
+ for (typename internal::evaluator<Derived>::InnerIterator iter(thisEval,j); iter; ++iter)
res += iter.value();
return res;
}
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 56c922929..5da7d2bef 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -11,14 +11,14 @@
#define EIGEN_SPARSE_SELFADJOINTVIEW_H
namespace Eigen {
-
+
/** \ingroup SparseCore_Module
* \class SparseSelfAdjointView
*
* \brief Pseudo expression to manipulate a triangular sparse matrix as a selfadjoint matrix.
*
* \param MatrixType the type of the dense matrix storing the coefficients
- * \param UpLo can be either \c #Lower or \c #Upper
+ * \param Mode can be either \c #Lower or \c #Upper
*
* This class is an expression of a sefladjoint matrix from a triangular part of a matrix
* with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
@@ -26,38 +26,34 @@ namespace Eigen {
*
* \sa SparseMatrixBase::selfadjointView()
*/
-template<typename Lhs, typename Rhs, int UpLo>
-class SparseSelfAdjointTimeDenseProduct;
-
-template<typename Lhs, typename Rhs, int UpLo>
-class DenseTimeSparseSelfAdjointProduct;
-
namespace internal {
-template<typename MatrixType, unsigned int UpLo>
-struct traits<SparseSelfAdjointView<MatrixType,UpLo> > : traits<MatrixType> {
+template<typename MatrixType, unsigned int Mode>
+struct traits<SparseSelfAdjointView<MatrixType,Mode> > : traits<MatrixType> {
};
-template<int SrcUpLo,int DstUpLo,typename MatrixType,int DestOrder>
+template<int SrcMode,int DstMode,typename MatrixType,int DestOrder>
void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm = 0);
-template<int UpLo,typename MatrixType,int DestOrder>
+template<int Mode,typename MatrixType,int DestOrder>
void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm = 0);
}
-template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
- : public EigenBase<SparseSelfAdjointView<MatrixType,UpLo> >
+template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
+ : public EigenBase<SparseSelfAdjointView<MatrixType,_Mode> >
{
public:
+
+ enum { Mode = _Mode };
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef Matrix<Index,Dynamic,1> VectorI;
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
-
- inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
+
+ explicit inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
{
eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices");
}
@@ -75,10 +71,10 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
* Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.
*/
template<typename OtherDerived>
- SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived>
+ Product<SparseSelfAdjointView, OtherDerived>
operator*(const SparseMatrixBase<OtherDerived>& rhs) const
{
- return SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived>(*this, rhs.derived());
+ return Product<SparseSelfAdjointView, OtherDerived>(*this, rhs.derived());
}
/** \returns an expression of the matrix product between a sparse matrix \a lhs and a sparse self-adjoint matrix \a rhs.
@@ -87,26 +83,26 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
* Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.
*/
template<typename OtherDerived> friend
- SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject >
+ Product<OtherDerived, SparseSelfAdjointView>
operator*(const SparseMatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs)
{
- return SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject>(lhs.derived(), rhs);
+ return Product<OtherDerived, SparseSelfAdjointView>(lhs.derived(), rhs);
}
/** Efficient sparse self-adjoint matrix times dense vector/matrix product */
template<typename OtherDerived>
- SparseSelfAdjointTimeDenseProduct<MatrixType,OtherDerived,UpLo>
+ Product<SparseSelfAdjointView,OtherDerived>
operator*(const MatrixBase<OtherDerived>& rhs) const
{
- return SparseSelfAdjointTimeDenseProduct<MatrixType,OtherDerived,UpLo>(m_matrix, rhs.derived());
+ return Product<SparseSelfAdjointView,OtherDerived>(*this, rhs.derived());
}
/** Efficient dense vector/matrix times sparse self-adjoint matrix product */
template<typename OtherDerived> friend
- DenseTimeSparseSelfAdjointProduct<OtherDerived,MatrixType,UpLo>
+ Product<OtherDerived,SparseSelfAdjointView>
operator*(const MatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs)
{
- return DenseTimeSparseSelfAdjointProduct<OtherDerived,_MatrixTypeNested,UpLo>(lhs.derived(), rhs.m_matrix);
+ return Product<OtherDerived,SparseSelfAdjointView>(lhs.derived(), rhs);
}
/** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
@@ -123,53 +119,49 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
/** \internal triggered by sparse_matrix = SparseSelfadjointView; */
template<typename DestScalar,int StorageOrder> void evalTo(SparseMatrix<DestScalar,StorageOrder,Index>& _dest) const
{
- internal::permute_symm_to_fullsymm<UpLo>(m_matrix, _dest);
+ internal::permute_symm_to_fullsymm<Mode>(m_matrix, _dest);
}
template<typename DestScalar> void evalTo(DynamicSparseMatrix<DestScalar,ColMajor,Index>& _dest) const
{
// TODO directly evaluate into _dest;
SparseMatrix<DestScalar,ColMajor,Index> tmp(_dest.rows(),_dest.cols());
- internal::permute_symm_to_fullsymm<UpLo>(m_matrix, tmp);
+ internal::permute_symm_to_fullsymm<Mode>(m_matrix, tmp);
_dest = tmp;
}
/** \returns an expression of P H P^-1 */
- SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
+ // TODO implement twists in a more evaluator friendly fashion
+ SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
{
- return SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo>(m_matrix, perm);
+ return SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode>(m_matrix, perm);
}
-
- template<typename SrcMatrixType,int SrcUpLo>
- SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcUpLo>& permutedMatrix)
+
+ template<typename SrcMatrixType,int SrcMode>
+ SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)
{
permutedMatrix.evalTo(*this);
return *this;
}
-
SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src)
{
PermutationMatrix<Dynamic> pnull;
return *this = src.twistedBy(pnull);
}
- template<typename SrcMatrixType,unsigned int SrcUpLo>
- SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcUpLo>& src)
+ template<typename SrcMatrixType,unsigned int SrcMode>
+ SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcMode>& src)
{
PermutationMatrix<Dynamic> pnull;
return *this = src.twistedBy(pnull);
}
-
- // const SparseLLT<PlainObject, UpLo> llt() const;
- // const SparseLDLT<PlainObject, UpLo> ldlt() const;
-
protected:
typename MatrixType::Nested m_matrix;
- mutable VectorI m_countPerRow;
- mutable VectorI m_countPerCol;
+ //mutable VectorI m_countPerRow;
+ //mutable VectorI m_countPerCol;
};
/***************************************************************************
@@ -177,33 +169,33 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
***************************************************************************/
template<typename Derived>
-template<unsigned int UpLo>
-const SparseSelfAdjointView<Derived, UpLo> SparseMatrixBase<Derived>::selfadjointView() const
+template<unsigned int Mode>
+const SparseSelfAdjointView<const Derived, Mode> SparseMatrixBase<Derived>::selfadjointView() const
{
- return derived();
+ return SparseSelfAdjointView<const Derived, Mode>(derived());
}
template<typename Derived>
-template<unsigned int UpLo>
-SparseSelfAdjointView<Derived, UpLo> SparseMatrixBase<Derived>::selfadjointView()
+template<unsigned int Mode>
+SparseSelfAdjointView<Derived, Mode> SparseMatrixBase<Derived>::selfadjointView()
{
- return derived();
+ return SparseSelfAdjointView<Derived, Mode>(derived());
}
/***************************************************************************
* Implementation of SparseSelfAdjointView methods
***************************************************************************/
-template<typename MatrixType, unsigned int UpLo>
+template<typename MatrixType, unsigned int Mode>
template<typename DerivedU>
-SparseSelfAdjointView<MatrixType,UpLo>&
-SparseSelfAdjointView<MatrixType,UpLo>::rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha)
+SparseSelfAdjointView<MatrixType,Mode>&
+SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha)
{
- SparseMatrix<Scalar,MatrixType::Flags&RowMajorBit?RowMajor:ColMajor> tmp = u * u.adjoint();
+ SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint();
if(alpha==Scalar(0))
- m_matrix.const_cast_derived() = tmp.template triangularView<UpLo>();
+ m_matrix.const_cast_derived() = tmp.template triangularView<Mode>();
else
- m_matrix.const_cast_derived() += alpha * tmp.template triangularView<UpLo>();
+ m_matrix.const_cast_derived() += alpha * tmp.template triangularView<Mode>();
return *this;
}
@@ -213,104 +205,154 @@ SparseSelfAdjointView<MatrixType,UpLo>::rankUpdate(const SparseMatrixBase<Derive
***************************************************************************/
namespace internal {
-template<typename Lhs, typename Rhs, int UpLo>
-struct traits<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo> >
- : traits<ProductBase<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> >
-{
- typedef Dense StorageKind;
-};
-}
-template<typename Lhs, typename Rhs, int UpLo>
-class SparseSelfAdjointTimeDenseProduct
- : public ProductBase<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo>, Lhs, Rhs>
+template<int Mode, typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
+inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
{
- public:
- EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseSelfAdjointTimeDenseProduct)
-
- SparseSelfAdjointTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
- {}
-
- template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+ EIGEN_ONLY_USED_FOR_DEBUG(alpha);
+ // TODO use alpha
+ eigen_assert(alpha==AlphaType(1) && "alpha != 1 is not implemented yet, sorry");
+
+ typedef typename evaluator<SparseLhsType>::type LhsEval;
+ typedef typename evaluator<SparseLhsType>::InnerIterator LhsIterator;
+ typedef typename SparseLhsType::Index Index;
+ typedef typename SparseLhsType::Scalar LhsScalar;
+
+ enum {
+ LhsIsRowMajor = (LhsEval::Flags&RowMajorBit)==RowMajorBit,
+ ProcessFirstHalf =
+ ((Mode&(Upper|Lower))==(Upper|Lower))
+ || ( (Mode&Upper) && !LhsIsRowMajor)
+ || ( (Mode&Lower) && LhsIsRowMajor),
+ ProcessSecondHalf = !ProcessFirstHalf
+ };
+
+ LhsEval lhsEval(lhs);
+
+ for (Index j=0; j<lhs.outerSize(); ++j)
+ {
+ LhsIterator i(lhsEval,j);
+ if (ProcessSecondHalf)
{
- EIGEN_ONLY_USED_FOR_DEBUG(alpha);
- // TODO use alpha
- eigen_assert(alpha==Scalar(1) && "alpha != 1 is not implemented yet, sorry");
- typedef typename internal::remove_all<Lhs>::type _Lhs;
- typedef typename _Lhs::InnerIterator LhsInnerIterator;
- enum {
- LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit,
- ProcessFirstHalf =
- ((UpLo&(Upper|Lower))==(Upper|Lower))
- || ( (UpLo&Upper) && !LhsIsRowMajor)
- || ( (UpLo&Lower) && LhsIsRowMajor),
- ProcessSecondHalf = !ProcessFirstHalf
- };
- for (typename _Lhs::Index j=0; j<m_lhs.outerSize(); ++j)
+ while (i && i.index()<j) ++i;
+ if(i && i.index()==j)
{
- LhsInnerIterator i(m_lhs,j);
- if (ProcessSecondHalf)
- {
- while (i && i.index()<j) ++i;
- if(i && i.index()==j)
- {
- dest.row(j) += i.value() * m_rhs.row(j);
- ++i;
- }
- }
- for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)
- {
- Index a = LhsIsRowMajor ? j : i.index();
- Index b = LhsIsRowMajor ? i.index() : j;
- typename Lhs::Scalar v = i.value();
- dest.row(a) += (v) * m_rhs.row(b);
- dest.row(b) += numext::conj(v) * m_rhs.row(a);
- }
- if (ProcessFirstHalf && i && (i.index()==j))
- dest.row(j) += i.value() * m_rhs.row(j);
+ res.row(j) += i.value() * rhs.row(j);
+ ++i;
}
}
+ for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)
+ {
+ Index a = LhsIsRowMajor ? j : i.index();
+ Index b = LhsIsRowMajor ? i.index() : j;
+ LhsScalar v = i.value();
+ res.row(a) += (v) * rhs.row(b);
+ res.row(b) += numext::conj(v) * rhs.row(a);
+ }
+ if (ProcessFirstHalf && i && (i.index()==j))
+ res.row(j) += i.value() * rhs.row(j);
+ }
+}
+
+// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
+// in the future selfadjoint-ness should be defined by the expression traits
+// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
+{
+ typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+ typedef SparseSelfAdjointShape Shape;
+
+ static const int AssumeAliasing = 0;
+};
- private:
- SparseSelfAdjointTimeDenseProduct& operator=(const SparseSelfAdjointTimeDenseProduct&);
+template<typename LhsView, typename Rhs, int ProductType>
+struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs)
+ {
+ typedef typename LhsView::_MatrixTypeNested Lhs;
+ typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+ typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+ LhsNested lhsNested(lhsView.matrix());
+ RhsNested rhsNested(rhs);
+
+ dst.setZero();
+ internal::sparse_selfadjoint_time_dense_product<LhsView::Mode>(lhsNested, rhsNested, dst, typename Dest::Scalar(1));
+ }
};
-namespace internal {
-template<typename Lhs, typename Rhs, int UpLo>
-struct traits<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo> >
- : traits<ProductBase<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> >
-{};
-}
+template<typename Lhs, typename RhsView, int ProductType>
+struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, ProductType>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView)
+ {
+ typedef typename RhsView::_MatrixTypeNested Rhs;
+ typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+ typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+ LhsNested lhsNested(lhs);
+ RhsNested rhsNested(rhsView.matrix());
+
+ dst.setZero();
+ // transpoe everything
+ Transpose<Dest> dstT(dst);
+ internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1));
+ }
+};
-template<typename Lhs, typename Rhs, int UpLo>
-class DenseTimeSparseSelfAdjointProduct
- : public ProductBase<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo>, Lhs, Rhs>
+// NOTE: these two overloads are needed to evaluate the sparse sefladjoint view into a full sparse matrix
+// TODO: maybe the copy could be handled by generic_product_impl so that these overloads would not be needed anymore
+
+template<typename LhsView, typename Rhs, int ProductTag>
+struct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, SparseSelfAdjointShape, SparseShape, typename traits<LhsView>::Scalar, typename traits<Rhs>::Scalar>
+ : public evaluator<typename Product<typename Rhs::PlainObject, Rhs, DefaultProduct>::PlainObject>::type
{
- public:
- EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseSelfAdjointProduct)
+ typedef Product<LhsView, Rhs, DefaultProduct> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
- DenseTimeSparseSelfAdjointProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
- {}
+ product_evaluator(const XprType& xpr)
+ : m_lhs(xpr.lhs()), m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ generic_product_impl<typename Rhs::PlainObject, Rhs, SparseShape, SparseShape, ProductTag>::evalTo(m_result, m_lhs, xpr.rhs());
+ }
+
+protected:
+ typename Rhs::PlainObject m_lhs;
+ PlainObject m_result;
+};
- template<typename Dest> void scaleAndAddTo(Dest& /*dest*/, const Scalar& /*alpha*/) const
- {
- // TODO
- }
+template<typename Lhs, typename RhsView, int ProductTag>
+struct product_evaluator<Product<Lhs, RhsView, DefaultProduct>, ProductTag, SparseShape, SparseSelfAdjointShape, typename traits<Lhs>::Scalar, typename traits<RhsView>::Scalar>
+ : public evaluator<typename Product<Lhs, typename Lhs::PlainObject, DefaultProduct>::PlainObject>::type
+{
+ typedef Product<Lhs, RhsView, DefaultProduct> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef typename evaluator<PlainObject>::type Base;
- private:
- DenseTimeSparseSelfAdjointProduct& operator=(const DenseTimeSparseSelfAdjointProduct&);
+ product_evaluator(const XprType& xpr)
+ : m_rhs(xpr.rhs()), m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ generic_product_impl<Lhs, typename Lhs::PlainObject, SparseShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), m_rhs);
+ }
+
+protected:
+ typename Lhs::PlainObject m_rhs;
+ PlainObject m_result;
};
+} // namespace internal
+
/***************************************************************************
* Implementation of symmetric copies and permutations
***************************************************************************/
namespace internal {
-
-template<typename MatrixType, int UpLo>
-struct traits<SparseSymmetricPermutationProduct<MatrixType,UpLo> > : traits<MatrixType> {
-};
-template<int UpLo,typename MatrixType,int DestOrder>
+template<int Mode,typename MatrixType,int DestOrder>
void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm)
{
typedef typename MatrixType::Index Index;
@@ -337,11 +379,11 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
Index r = it.row();
Index c = it.col();
Index ip = perm ? perm[i] : i;
- if(UpLo==(Upper|Lower))
+ if(Mode==(Upper|Lower))
count[StorageOrderMatch ? jp : ip]++;
else if(r==c)
count[ip]++;
- else if(( UpLo==Lower && r>c) || ( UpLo==Upper && r<c))
+ else if(( Mode==Lower && r>c) || ( Mode==Upper && r<c))
{
count[ip]++;
count[jp]++;
@@ -370,7 +412,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
Index jp = perm ? perm[j] : j;
Index ip = perm ? perm[i] : i;
- if(UpLo==(Upper|Lower))
+ if(Mode==(Upper|Lower))
{
Index k = count[StorageOrderMatch ? jp : ip]++;
dest.innerIndexPtr()[k] = StorageOrderMatch ? ip : jp;
@@ -382,7 +424,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
dest.innerIndexPtr()[k] = ip;
dest.valuePtr()[k] = it.value();
}
- else if(( (UpLo&Lower)==Lower && r>c) || ( (UpLo&Upper)==Upper && r<c))
+ else if(( (Mode&Lower)==Lower && r>c) || ( (Mode&Upper)==Upper && r<c))
{
if(!StorageOrderMatch)
std::swap(ip,jp);
@@ -397,7 +439,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
}
}
-template<int _SrcUpLo,int _DstUpLo,typename MatrixType,int DstOrder>
+template<int _SrcMode,int _DstMode,typename MatrixType,int DstOrder>
void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DstOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm)
{
typedef typename MatrixType::Index Index;
@@ -407,8 +449,8 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
enum {
SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor,
StorageOrderMatch = int(SrcOrder) == int(DstOrder),
- DstUpLo = DstOrder==RowMajor ? (_DstUpLo==Upper ? Lower : Upper) : _DstUpLo,
- SrcUpLo = SrcOrder==RowMajor ? (_SrcUpLo==Upper ? Lower : Upper) : _SrcUpLo
+ DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode,
+ SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode
};
Index size = mat.rows();
@@ -421,11 +463,11 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
{
Index i = it.index();
- if((int(SrcUpLo)==int(Lower) && i<j) || (int(SrcUpLo)==int(Upper) && i>j))
+ if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
continue;
Index ip = perm ? perm[i] : i;
- count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
+ count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
}
}
dest.outerIndexPtr()[0] = 0;
@@ -441,17 +483,17 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
{
Index i = it.index();
- if((int(SrcUpLo)==int(Lower) && i<j) || (int(SrcUpLo)==int(Upper) && i>j))
+ if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
continue;
Index jp = perm ? perm[j] : j;
Index ip = perm? perm[i] : i;
- Index k = count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
- dest.innerIndexPtr()[k] = int(DstUpLo)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp);
+ Index k = count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
+ dest.innerIndexPtr()[k] = int(DstMode)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp);
if(!StorageOrderMatch) std::swap(ip,jp);
- if( ((int(DstUpLo)==int(Lower) && ip<jp) || (int(DstUpLo)==int(Upper) && ip>jp)))
+ if( ((int(DstMode)==int(Lower) && ip<jp) || (int(DstMode)==int(Upper) && ip>jp)))
dest.valuePtr()[k] = numext::conj(it.value());
else
dest.valuePtr()[k] = it.value();
@@ -461,9 +503,19 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
}
-template<typename MatrixType,int UpLo>
+// TODO implement twists in a more evaluator friendly fashion
+
+namespace internal {
+
+template<typename MatrixType, int Mode>
+struct traits<SparseSymmetricPermutationProduct<MatrixType,Mode> > : traits<MatrixType> {
+};
+
+}
+
+template<typename MatrixType,int Mode>
class SparseSymmetricPermutationProduct
- : public EigenBase<SparseSymmetricPermutationProduct<MatrixType,UpLo> >
+ : public EigenBase<SparseSymmetricPermutationProduct<MatrixType,Mode> >
{
public:
typedef typename MatrixType::Scalar Scalar;
@@ -485,15 +537,15 @@ class SparseSymmetricPermutationProduct
template<typename DestScalar, int Options, typename DstIndex>
void evalTo(SparseMatrix<DestScalar,Options,DstIndex>& _dest) const
{
-// internal::permute_symm_to_fullsymm<UpLo>(m_matrix,_dest,m_perm.indices().data());
+// internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
SparseMatrix<DestScalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
- internal::permute_symm_to_fullsymm<UpLo>(m_matrix,tmp,m_perm.indices().data());
+ internal::permute_symm_to_fullsymm<Mode>(m_matrix,tmp,m_perm.indices().data());
_dest = tmp;
}
- template<typename DestType,unsigned int DestUpLo> void evalTo(SparseSelfAdjointView<DestType,DestUpLo>& dest) const
+ template<typename DestType,unsigned int DestMode> void evalTo(SparseSelfAdjointView<DestType,DestMode>& dest) const
{
- internal::permute_symm_to_symm<UpLo,DestUpLo>(m_matrix,dest.matrix(),m_perm.indices().data());
+ internal::permute_symm_to_symm<Mode,DestMode>(m_matrix,dest.matrix(),m_perm.indices().data());
}
protected:
diff --git a/Eigen/src/SparseCore/SparseSolverBase.h b/Eigen/src/SparseCore/SparseSolverBase.h
new file mode 100644
index 000000000..df4e2f017
--- /dev/null
+++ b/Eigen/src/SparseCore/SparseSolverBase.h
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSESOLVERBASE_H
+#define EIGEN_SPARSESOLVERBASE_H
+
+namespace Eigen {
+
+namespace internal {
+
+ /** \internal
+ * Helper functions to solve with a sparse right-hand-side and result.
+ * The rhs is decomposed into small vertical panels which are solved through dense temporaries.
+ */
+template<typename Decomposition, typename Rhs, typename Dest>
+void solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)
+{
+ EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
+ typedef typename Dest::Scalar DestScalar;
+ // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
+ static const int NbColsAtOnce = 4;
+ int rhsCols = rhs.cols();
+ int size = rhs.rows();
+ // the temporary matrices do not need more columns than NbColsAtOnce:
+ int tmpCols = (std::min)(rhsCols, NbColsAtOnce);
+ Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols);
+ Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols);
+ for(int k=0; k<rhsCols; k+=NbColsAtOnce)
+ {
+ int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
+ tmp.leftCols(actualCols) = rhs.middleCols(k,actualCols);
+ tmpX.leftCols(actualCols) = dec.solve(tmp.leftCols(actualCols));
+ dest.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView();
+ }
+}
+
+} // end namespace internal
+
+/** \class SparseSolverBase
+ * \ingroup SparseCore_Module
+ * \brief A base class for sparse solvers
+ *
+ * \tparam Derived the actual type of the solver.
+ *
+ */
+template<typename Derived>
+class SparseSolverBase : internal::noncopyable
+{
+ public:
+
+ /** Default constructor */
+ SparseSolverBase()
+ : m_isInitialized(false)
+ {}
+
+ ~SparseSolverBase()
+ {}
+
+ Derived& derived() { return *static_cast<Derived*>(this); }
+ const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+ /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
+ *
+ * \sa compute()
+ */
+ template<typename Rhs>
+ inline const Solve<Derived, Rhs>
+ solve(const MatrixBase<Rhs>& b) const
+ {
+ eigen_assert(m_isInitialized && "Solver is not initialized.");
+ eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+ return Solve<Derived, Rhs>(derived(), b.derived());
+ }
+
+ /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
+ *
+ * \sa compute()
+ */
+ template<typename Rhs>
+ inline const Solve<Derived, Rhs>
+ solve(const SparseMatrixBase<Rhs>& b) const
+ {
+ eigen_assert(m_isInitialized && "Solver is not initialized.");
+ eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+ return Solve<Derived, Rhs>(derived(), b.derived());
+ }
+
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ /** \internal default implementation of solving with a sparse rhs */
+ template<typename Rhs,typename Dest>
+ void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const
+ {
+ internal::solve_sparse_through_dense_panels(derived(), b.derived(), dest.derived());
+ }
+ #endif // EIGEN_PARSED_BY_DOXYGEN
+
+ protected:
+
+ mutable bool m_isInitialized;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPARSESOLVERBASE_H
diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
index fcc18f5c9..f291f8cef 100644
--- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -46,6 +46,9 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
res.resize(cols, rows);
else
res.resize(rows, cols);
+
+ typename evaluator<Lhs>::type lhsEval(lhs);
+ typename evaluator<Rhs>::type rhsEval(rhs);
res.reserve(estimated_nnz_prod);
double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols());
@@ -56,12 +59,12 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
// let's do a more accurate determination of the nnz ratio for the current column j of res
tempVector.init(ratioColRes);
tempVector.setZero();
- for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+ for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
tempVector.restart();
Scalar x = rhsIt.value();
- for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
+ for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
{
tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
}
@@ -140,8 +143,53 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
}
};
-// NOTE the 2 others cases (col row *) must never occur since they are caught
-// by ProductReturnType which transforms it to (col col *) by evaluating rhs.
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor>
+{
+ typedef typename ResultType::RealScalar RealScalar;
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+ {
+ typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::Index> RowMajorMatrixLhs;
+ RowMajorMatrixLhs rowLhs(lhs);
+ sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor>
+{
+ typedef typename ResultType::RealScalar RealScalar;
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+ {
+ typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::Index> RowMajorMatrixRhs;
+ RowMajorMatrixRhs rowRhs(rhs);
+ sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor>
+{
+ typedef typename ResultType::RealScalar RealScalar;
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+ {
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixRhs;
+ ColMajorMatrixRhs colRhs(rhs);
+ internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);
+ }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor>
+{
+ typedef typename ResultType::RealScalar RealScalar;
+ static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+ {
+ typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixLhs;
+ ColMajorMatrixLhs colLhs(lhs);
+ internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);
+ }
+};
} // end namespace internal
diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h
index 7c300ee8d..c3d2d1a16 100644
--- a/Eigen/src/SparseCore/SparseTranspose.h
+++ b/Eigen/src/SparseCore/SparseTranspose.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,52 +12,64 @@
namespace Eigen {
+// Implement nonZeros() for transpose. I'm not sure that's the best approach for that.
+// Perhaps it should be implemented in Transpose<> itself.
template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>
: public SparseMatrixBase<Transpose<MatrixType> >
{
- typedef typename internal::remove_all<typename MatrixType::Nested>::type _MatrixTypeNested;
+ protected:
+ typedef SparseMatrixBase<Transpose<MatrixType> > Base;
public:
-
- EIGEN_SPARSE_PUBLIC_INTERFACE(Transpose<MatrixType> )
-
- class InnerIterator;
- class ReverseInnerIterator;
-
- inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); }
+ inline typename MatrixType::Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); }
};
-// NOTE: VC10 trigger an ICE if don't put typename TransposeImpl<MatrixType,Sparse>:: in front of Index,
-// a typedef typename TransposeImpl<MatrixType,Sparse>::Index Index;
-// does not fix the issue.
-// An alternative is to define the nested class in the parent class itself.
-template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::InnerIterator
- : public _MatrixTypeNested::InnerIterator
+namespace internal {
+
+template<typename ArgType>
+struct unary_evaluator<Transpose<ArgType>, IteratorBased>
+ : public evaluator_base<Transpose<ArgType> >
{
- typedef typename _MatrixTypeNested::InnerIterator Base;
- typedef typename TransposeImpl::Index Index;
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+ typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
public:
+ typedef Transpose<ArgType> XprType;
+ typedef typename XprType::Index Index;
- EIGEN_STRONG_INLINE InnerIterator(const TransposeImpl& trans, typename TransposeImpl<MatrixType,Sparse>::Index outer)
- : Base(trans.derived().nestedExpression(), outer)
- {}
- Index row() const { return Base::col(); }
- Index col() const { return Base::row(); }
-};
-
-template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::ReverseInnerIterator
- : public _MatrixTypeNested::ReverseInnerIterator
-{
- typedef typename _MatrixTypeNested::ReverseInnerIterator Base;
- typedef typename TransposeImpl::Index Index;
- public:
+ class InnerIterator : public EvalIterator
+ {
+ public:
+ EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+ : EvalIterator(unaryOp.m_argImpl,outer)
+ {}
+
+ Index row() const { return EvalIterator::col(); }
+ Index col() const { return EvalIterator::row(); }
+ };
+
+ class ReverseInnerIterator : public EvalReverseIterator
+ {
+ public:
+ EIGEN_STRONG_INLINE ReverseInnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+ : EvalReverseIterator(unaryOp.m_argImpl,outer)
+ {}
+
+ Index row() const { return EvalReverseIterator::col(); }
+ Index col() const { return EvalReverseIterator::row(); }
+ };
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = XprType::Flags
+ };
+
+ explicit unary_evaluator(const XprType& op) :m_argImpl(op.nestedExpression()) {}
- EIGEN_STRONG_INLINE ReverseInnerIterator(const TransposeImpl& xpr, typename TransposeImpl<MatrixType,Sparse>::Index outer)
- : Base(xpr.derived().nestedExpression(), outer)
- {}
- Index row() const { return Base::col(); }
- Index col() const { return Base::row(); }
+ protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
};
+} // end namespace internal
+
} // end namespace Eigen
#endif // EIGEN_SPARSETRANSPOSE_H
diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h
index 333127b78..b044d6778 100644
--- a/Eigen/src/SparseCore/SparseTriangularView.h
+++ b/Eigen/src/SparseCore/SparseTriangularView.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -13,17 +13,8 @@
namespace Eigen {
-namespace internal {
-
-template<typename MatrixType, int Mode>
-struct traits<SparseTriangularView<MatrixType,Mode> >
-: public traits<MatrixType>
-{};
-
-} // namespace internal
-
-template<typename MatrixType, int Mode> class SparseTriangularView
- : public SparseMatrixBase<SparseTriangularView<MatrixType,Mode> >
+template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<MatrixType,Mode,Sparse>
+ : public SparseMatrixBase<TriangularView<MatrixType,Mode> >
{
enum { SkipFirst = ((Mode&Lower) && !(MatrixType::Flags&RowMajorBit))
|| ((Mode&Upper) && (MatrixType::Flags&RowMajorBit)),
@@ -31,46 +22,53 @@ template<typename MatrixType, int Mode> class SparseTriangularView
SkipDiag = (Mode&ZeroDiag) ? 1 : 0,
HasUnitDiag = (Mode&UnitDiag) ? 1 : 0
};
+
+ typedef TriangularView<MatrixType,Mode> TriangularViewType;
+
+protected:
+ // dummy solve function to make TriangularView happy.
+ void solve() const;
public:
- EIGEN_SPARSE_PUBLIC_INTERFACE(SparseTriangularView)
-
+ EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType)
+
class InnerIterator;
class ReverseInnerIterator;
- inline Index rows() const { return m_matrix.rows(); }
- inline Index cols() const { return m_matrix.cols(); }
-
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename internal::remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename internal::remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
- inline SparseTriangularView(const MatrixType& matrix) : m_matrix(matrix) {}
-
- /** \internal */
- inline const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
-
- template<typename OtherDerived>
- typename internal::plain_matrix_type_column_major<OtherDerived>::type
- solve(const MatrixBase<OtherDerived>& other) const;
+ template<typename RhsType, typename DstType>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
+ if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs)))
+ dst = rhs;
+ this->solveInPlace(dst);
+ }
template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const;
- protected:
- MatrixTypeNested m_matrix;
+ inline Index nonZeros() const {
+ // FIXME HACK number of nonZeros is required for product logic
+ // this returns only an upper bound (but should be OK for most purposes)
+ return derived().nestedExpression().nonZeros();
+ }
+
+
};
-template<typename MatrixType, int Mode>
-class SparseTriangularView<MatrixType,Mode>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator
+template<typename MatrixType, unsigned int Mode>
+class TriangularViewImpl<MatrixType,Mode,Sparse>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator
{
typedef typename MatrixTypeNestedCleaned::InnerIterator Base;
- typedef typename SparseTriangularView::Index Index;
+ typedef typename TriangularViewType::Index Index;
public:
- EIGEN_STRONG_INLINE InnerIterator(const SparseTriangularView& view, Index outer)
- : Base(view.nestedExpression(), outer), m_returnOne(false)
+ EIGEN_STRONG_INLINE InnerIterator(const TriangularViewImpl& view, Index outer)
+ : Base(view.derived().nestedExpression(), outer), m_returnOne(false)
{
if(SkipFirst)
{
@@ -132,15 +130,15 @@ class SparseTriangularView<MatrixType,Mode>::InnerIterator : public MatrixTypeNe
bool m_returnOne;
};
-template<typename MatrixType, int Mode>
-class SparseTriangularView<MatrixType,Mode>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator
+template<typename MatrixType, unsigned int Mode>
+class TriangularViewImpl<MatrixType,Mode,Sparse>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator
{
typedef typename MatrixTypeNestedCleaned::ReverseInnerIterator Base;
- typedef typename SparseTriangularView::Index Index;
+ typedef typename TriangularViewImpl::Index Index;
public:
- EIGEN_STRONG_INLINE ReverseInnerIterator(const SparseTriangularView& view, Index outer)
- : Base(view.nestedExpression(), outer)
+ EIGEN_STRONG_INLINE ReverseInnerIterator(const TriangularViewType& view, Index outer)
+ : Base(view.derived().nestedExpression(), outer)
{
eigen_assert((!HasUnitDiag) && "ReverseInnerIterator does not support yet triangular views with a unit diagonal");
if(SkipLast) {
@@ -166,12 +164,119 @@ class SparseTriangularView<MatrixType,Mode>::ReverseInnerIterator : public Matri
}
};
+namespace internal {
+
+template<typename ArgType, unsigned int Mode>
+struct unary_evaluator<TriangularView<ArgType,Mode>, IteratorBased>
+ : evaluator_base<TriangularView<ArgType,Mode> >
+{
+ typedef TriangularView<ArgType,Mode> XprType;
+
+protected:
+
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::Index Index;
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+
+ enum { SkipFirst = ((Mode&Lower) && !(ArgType::Flags&RowMajorBit))
+ || ((Mode&Upper) && (ArgType::Flags&RowMajorBit)),
+ SkipLast = !SkipFirst,
+ SkipDiag = (Mode&ZeroDiag) ? 1 : 0,
+ HasUnitDiag = (Mode&UnitDiag) ? 1 : 0
+ };
+
+public:
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = XprType::Flags
+ };
+
+ explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {}
+
+ class InnerIterator : public EvalIterator
+ {
+ typedef EvalIterator Base;
+ public:
+
+ EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& xprEval, Index outer)
+ : Base(xprEval.m_argImpl,outer), m_returnOne(false)
+ {
+ if(SkipFirst)
+ {
+ while((*this) && ((HasUnitDiag||SkipDiag) ? this->index()<=outer : this->index()<outer))
+ Base::operator++();
+ if(HasUnitDiag)
+ m_returnOne = true;
+ }
+ else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer()))
+ {
+ if((!SkipFirst) && Base::operator bool())
+ Base::operator++();
+ m_returnOne = true; // FIXME check innerSize()>outer();
+ }
+ }
+
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
+ {
+ if(HasUnitDiag && m_returnOne)
+ m_returnOne = false;
+ else
+ {
+ Base::operator++();
+ if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer()))
+ {
+ if((!SkipFirst) && Base::operator bool())
+ Base::operator++();
+ m_returnOne = true; // FIXME check innerSize()>outer();
+ }
+ }
+ return *this;
+ }
+
+ EIGEN_STRONG_INLINE operator bool() const
+ {
+ if(HasUnitDiag && m_returnOne)
+ return true;
+ if(SkipFirst) return Base::operator bool();
+ else
+ {
+ if (SkipDiag) return (Base::operator bool() && this->index() < this->outer());
+ else return (Base::operator bool() && this->index() <= this->outer());
+ }
+ }
+
+// inline Index row() const { return (ArgType::Flags&RowMajorBit ? Base::outer() : this->index()); }
+// inline Index col() const { return (ArgType::Flags&RowMajorBit ? this->index() : Base::outer()); }
+ inline Index index() const
+ {
+ if(HasUnitDiag && m_returnOne) return Base::outer();
+ else return Base::index();
+ }
+ inline Scalar value() const
+ {
+ if(HasUnitDiag && m_returnOne) return Scalar(1);
+ else return Base::value();
+ }
+
+ protected:
+ bool m_returnOne;
+ private:
+ Scalar& valueRef();
+ };
+
+protected:
+ typename evaluator<ArgType>::type m_argImpl;
+};
+
+} // end namespace internal
+
template<typename Derived>
template<int Mode>
-inline const SparseTriangularView<Derived, Mode>
+inline const TriangularView<const Derived, Mode>
SparseMatrixBase<Derived>::triangularView() const
{
- return derived();
+ return TriangularView<const Derived, Mode>(derived());
}
} // end namespace Eigen
diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h
index 02c19d18f..8de227b88 100644
--- a/Eigen/src/SparseCore/SparseUtil.h
+++ b/Eigen/src/SparseCore/SparseUtil.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -52,13 +52,12 @@ EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=)
typedef typename Eigen::internal::traits<Derived >::Index Index; \
enum { RowsAtCompileTime = Eigen::internal::traits<Derived >::RowsAtCompileTime, \
ColsAtCompileTime = Eigen::internal::traits<Derived >::ColsAtCompileTime, \
- Flags = Eigen::internal::traits<Derived >::Flags, \
- CoeffReadCost = Eigen::internal::traits<Derived >::CoeffReadCost, \
+ Flags = Eigen::internal::traits<Derived>::Flags, \
SizeAtCompileTime = Base::SizeAtCompileTime, \
IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
using Base::derived; \
using Base::const_cast_derived;
-
+
#define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \
_EIGEN_SPARSE_PUBLIC_INTERFACE(Derived, Eigen::SparseMatrixBase<Derived >)
@@ -73,7 +72,6 @@ template<typename _Scalar, int _Flags = 0, typename _Index = int> class Dynamic
template<typename _Scalar, int _Flags = 0, typename _Index = int> class SparseVector;
template<typename _Scalar, int _Flags = 0, typename _Index = int> class MappedSparseMatrix;
-template<typename MatrixType, int Mode> class SparseTriangularView;
template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView;
template<typename Lhs, typename Rhs> class SparseDiagonalProduct;
template<typename MatrixType> class SparseView;
@@ -131,11 +129,29 @@ template<typename T> struct plain_matrix_type<T,Sparse>
{
typedef typename traits<T>::Scalar _Scalar;
typedef typename traits<T>::Index _Index;
- enum { _Options = ((traits<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
+ enum { _Options = ((evaluator<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
public:
typedef SparseMatrix<_Scalar, _Options, _Index> type;
};
+template<typename Decomposition, typename RhsType>
+struct solve_traits<Decomposition,RhsType,Sparse>
+{
+ typedef typename sparse_eval<RhsType, RhsType::RowsAtCompileTime, RhsType::ColsAtCompileTime>::type PlainObject;
+};
+
+template<typename Derived>
+struct generic_xpr_base<Derived, MatrixXpr, Sparse>
+{
+ typedef SparseMatrixBase<Derived> type;
+};
+
+struct SparseTriangularShape { static std::string debugName() { return "SparseTriangularShape"; } };
+struct SparseSelfAdjointShape { static std::string debugName() { return "SparseSelfAdjointShape"; } };
+
+template<> struct glue_shapes<SparseShape,SelfAdjointShape> { typedef SparseSelfAdjointShape type; };
+template<> struct glue_shapes<SparseShape,TriangularShape > { typedef SparseTriangularShape type; };
+
} // end namespace internal
/** \ingroup SparseCore_Module
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index 0b1b389ce..8b696a476 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -221,7 +221,7 @@ class SparseVector
inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); }
- inline SparseVector(Index size) : m_size(0) { check_template_parameters(); resize(size); }
+ explicit inline SparseVector(Index size) : m_size(0) { check_template_parameters(); resize(size); }
inline SparseVector(Index rows, Index cols) : m_size(0) { check_template_parameters(); resize(rows,cols); }
@@ -360,14 +360,14 @@ template<typename Scalar, int _Options, typename _Index>
class SparseVector<Scalar,_Options,_Index>::InnerIterator
{
public:
- InnerIterator(const SparseVector& vec, Index outer=0)
+ explicit InnerIterator(const SparseVector& vec, Index outer=0)
: m_data(vec.m_data), m_id(0), m_end(static_cast<Index>(m_data.size()))
{
EIGEN_UNUSED_VARIABLE(outer);
eigen_assert(outer==0);
}
- InnerIterator(const internal::CompressedStorage<Scalar,Index>& data)
+ explicit InnerIterator(const internal::CompressedStorage<Scalar,Index>& data)
: m_data(data), m_id(0), m_end(static_cast<Index>(m_data.size()))
{}
@@ -386,20 +386,25 @@ class SparseVector<Scalar,_Options,_Index>::InnerIterator
const internal::CompressedStorage<Scalar,Index>& m_data;
Index m_id;
const Index m_end;
+ private:
+ // If you get here, then you're not using the right InnerIterator type, e.g.:
+ // SparseMatrix<double,RowMajor> A;
+ // SparseMatrix<double>::InnerIterator it(A,0);
+ template<typename T> InnerIterator(const SparseMatrixBase<T>&,Index outer=0);
};
template<typename Scalar, int _Options, typename _Index>
class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator
{
public:
- ReverseInnerIterator(const SparseVector& vec, Index outer=0)
+ explicit ReverseInnerIterator(const SparseVector& vec, Index outer=0)
: m_data(vec.m_data), m_id(static_cast<Index>(m_data.size())), m_start(0)
{
EIGEN_UNUSED_VARIABLE(outer);
eigen_assert(outer==0);
}
- ReverseInnerIterator(const internal::CompressedStorage<Scalar,Index>& data)
+ explicit ReverseInnerIterator(const internal::CompressedStorage<Scalar,Index>& data)
: m_data(data), m_id(static_cast<Index>(m_data.size())), m_start(0)
{}
@@ -422,11 +427,34 @@ class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator
namespace internal {
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<SparseVector<_Scalar,_Options,_Index> >
+ : evaluator_base<SparseVector<_Scalar,_Options,_Index> >
+{
+ typedef SparseVector<_Scalar,_Options,_Index> SparseVectorType;
+ typedef typename SparseVectorType::InnerIterator InnerIterator;
+ typedef typename SparseVectorType::ReverseInnerIterator ReverseInnerIterator;
+
+ enum {
+ CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+ Flags = SparseVectorType::Flags
+ };
+
+ explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {}
+
+ operator SparseVectorType&() { return m_matrix.const_cast_derived(); }
+ operator const SparseVectorType&() const { return m_matrix; }
+
+ const SparseVectorType &m_matrix;
+};
+
template< typename Dest, typename Src>
struct sparse_vector_assign_selector<Dest,Src,SVA_Inner> {
static void run(Dest& dst, const Src& src) {
eigen_internal_assert(src.innerSize()==src.size());
- for(typename Src::InnerIterator it(src, 0); it; ++it)
+ typedef typename internal::evaluator<Src>::type SrcEvaluatorType;
+ SrcEvaluatorType srcEval(src);
+ for(typename SrcEvaluatorType::InnerIterator it(srcEval, 0); it; ++it)
dst.insert(it.index()) = it.value();
}
};
@@ -435,9 +463,11 @@ template< typename Dest, typename Src>
struct sparse_vector_assign_selector<Dest,Src,SVA_Outer> {
static void run(Dest& dst, const Src& src) {
eigen_internal_assert(src.outerSize()==src.size());
+ typedef typename internal::evaluator<Src>::type SrcEvaluatorType;
+ SrcEvaluatorType srcEval(src);
for(typename Dest::Index i=0; i<src.size(); ++i)
{
- typename Src::InnerIterator it(src, i);
+ typename SrcEvaluatorType::InnerIterator it(srcEval, i);
if(it)
dst.insert(i) = it.value();
}
diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h
index fd8450463..40a3019fa 100644
--- a/Eigen/src/SparseCore/SparseView.h
+++ b/Eigen/src/SparseCore/SparseView.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2010 Daniel Lowengrub <lowdanie@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -34,64 +34,186 @@ class SparseView : public SparseMatrixBase<SparseView<MatrixType> >
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
public:
EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
- SparseView(const MatrixType& mat, const Scalar& m_reference = Scalar(0),
- typename NumTraits<Scalar>::Real m_epsilon = NumTraits<Scalar>::dummy_precision()) :
+ explicit SparseView(const MatrixType& mat, const Scalar& m_reference = Scalar(0),
+ RealScalar m_epsilon = NumTraits<Scalar>::dummy_precision()) :
m_matrix(mat), m_reference(m_reference), m_epsilon(m_epsilon) {}
- class InnerIterator;
-
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
inline Index innerSize() const { return m_matrix.innerSize(); }
inline Index outerSize() const { return m_matrix.outerSize(); }
-
+
+ /** \returns the nested expression */
+ const typename internal::remove_all<MatrixTypeNested>::type&
+ nestedExpression() const { return m_matrix; }
+
+ Scalar reference() const { return m_reference; }
+ RealScalar epsilon() const { return m_epsilon; }
+
protected:
MatrixTypeNested m_matrix;
Scalar m_reference;
- typename NumTraits<Scalar>::Real m_epsilon;
+ RealScalar m_epsilon;
};
-template<typename MatrixType>
-class SparseView<MatrixType>::InnerIterator : public _MatrixTypeNested::InnerIterator
-{
- typedef typename SparseView::Index Index;
-public:
- typedef typename _MatrixTypeNested::InnerIterator IterBase;
- InnerIterator(const SparseView& view, Index outer) :
- IterBase(view.m_matrix, outer), m_view(view)
- {
- incrementToNonZero();
- }
-
- EIGEN_STRONG_INLINE InnerIterator& operator++()
- {
- IterBase::operator++();
- incrementToNonZero();
- return *this;
- }
-
- using IterBase::value;
+namespace internal {
-protected:
- const SparseView& m_view;
+// TODO find a way to unify the two following variants
+// This is tricky because implementing an inner iterator on top of an IndexBased evaluator is
+// not easy because the evaluators do not expose the sizes of the underlying expression.
+
+template<typename ArgType>
+struct unary_evaluator<SparseView<ArgType>, IteratorBased>
+ : public evaluator_base<SparseView<ArgType> >
+{
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+ public:
+ typedef SparseView<ArgType> XprType;
+
+ class InnerIterator : public EvalIterator
+ {
+ typedef typename XprType::Scalar Scalar;
+ public:
+
+ EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, typename XprType::Index outer)
+ : EvalIterator(sve.m_argImpl,outer), m_view(sve.m_view)
+ {
+ incrementToNonZero();
+ }
+
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
+ {
+ EvalIterator::operator++();
+ incrementToNonZero();
+ return *this;
+ }
+
+ using EvalIterator::value;
+
+ protected:
+ const XprType &m_view;
+
+ private:
+ void incrementToNonZero()
+ {
+ while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.reference(), m_view.epsilon()))
+ {
+ EvalIterator::operator++();
+ }
+ }
+ };
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = XprType::Flags
+ };
+
+ explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {}
+
+ protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+ const XprType &m_view;
+};
-private:
- void incrementToNonZero()
- {
- while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.m_reference, m_view.m_epsilon))
+template<typename ArgType>
+struct unary_evaluator<SparseView<ArgType>, IndexBased>
+ : public evaluator_base<SparseView<ArgType> >
+{
+ public:
+ typedef SparseView<ArgType> XprType;
+ protected:
+ enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ public:
+
+ class InnerIterator
{
- IterBase::operator++();
- }
- }
+ public:
+
+ EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, typename XprType::Index outer)
+ : m_sve(sve), m_inner(0), m_outer(outer), m_end(sve.m_view.innerSize())
+ {
+ incrementToNonZero();
+ }
+
+ EIGEN_STRONG_INLINE InnerIterator& operator++()
+ {
+ m_inner++;
+ incrementToNonZero();
+ return *this;
+ }
+
+ EIGEN_STRONG_INLINE Scalar value() const
+ {
+ return (IsRowMajor) ? m_sve.m_argImpl.coeff(m_outer, m_inner)
+ : m_sve.m_argImpl.coeff(m_inner, m_outer);
+ }
+
+ EIGEN_STRONG_INLINE Index index() const { return m_inner; }
+ inline Index row() const { return IsRowMajor ? m_outer : index(); }
+ inline Index col() const { return IsRowMajor ? index() : m_outer; }
+
+ EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
+
+ protected:
+ const unary_evaluator &m_sve;
+ Index m_inner;
+ const Index m_outer;
+ const Index m_end;
+
+ private:
+ void incrementToNonZero()
+ {
+ while((bool(*this)) && internal::isMuchSmallerThan(value(), m_sve.m_view.reference(), m_sve.m_view.epsilon()))
+ {
+ m_inner++;
+ }
+ }
+ };
+
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = XprType::Flags
+ };
+
+ explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {}
+
+ protected:
+ typename evaluator<ArgType>::nestedType m_argImpl;
+ const XprType &m_view;
};
+} // end namespace internal
+
+template<typename Derived>
+const SparseView<Derived> MatrixBase<Derived>::sparseView(const Scalar& reference,
+ const typename NumTraits<Scalar>::Real& epsilon) const
+{
+ return SparseView<Derived>(derived(), reference, epsilon);
+}
+
+/** \returns an expression of \c *this with values smaller than
+ * \a reference * \a epsilon are removed.
+ *
+ * This method is typically used in conjunction with the product of two sparse matrices
+ * to automatically prune the smallest values as follows:
+ * \code
+ * C = (A*B).pruned(); // suppress numerical zeros (exact)
+ * C = (A*B).pruned(ref);
+ * C = (A*B).pruned(ref,epsilon);
+ * \endcode
+ * where \c ref is a meaningful non zero reference value.
+ * */
template<typename Derived>
-const SparseView<Derived> MatrixBase<Derived>::sparseView(const Scalar& m_reference,
- const typename NumTraits<Scalar>::Real& m_epsilon) const
+const SparseView<Derived>
+SparseMatrixBase<Derived>::pruned(const Scalar& reference,
+ const RealScalar& epsilon) const
{
- return SparseView<Derived>(derived(), m_reference, m_epsilon);
+ return SparseView<Derived>(derived(), reference, epsilon);
}
} // end namespace Eigen
diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h
index dd55522a7..98062e9c6 100644
--- a/Eigen/src/SparseCore/TriangularSolver.h
+++ b/Eigen/src/SparseCore/TriangularSolver.h
@@ -29,8 +29,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Lhs::Index Index;
+ typedef typename evaluator<Lhs>::type LhsEval;
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
static void run(const Lhs& lhs, Rhs& other)
{
+ LhsEval lhsEval(lhs);
for(Index col=0 ; col<other.cols() ; ++col)
{
for(Index i=0; i<lhs.rows(); ++i)
@@ -38,7 +41,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor>
Scalar tmp = other.coeff(i,col);
Scalar lastVal(0);
Index lastIndex = 0;
- for(typename Lhs::InnerIterator it(lhs, i); it; ++it)
+ for(LhsIterator it(lhsEval, i); it; ++it)
{
lastVal = it.value();
lastIndex = it.index();
@@ -64,15 +67,18 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Lhs::Index Index;
+ typedef typename evaluator<Lhs>::type LhsEval;
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
static void run(const Lhs& lhs, Rhs& other)
{
+ LhsEval lhsEval(lhs);
for(Index col=0 ; col<other.cols() ; ++col)
{
for(Index i=lhs.rows()-1 ; i>=0 ; --i)
{
Scalar tmp = other.coeff(i,col);
Scalar l_ii = 0;
- typename Lhs::InnerIterator it(lhs, i);
+ LhsIterator it(lhsEval, i);
while(it && it.index()<i)
++it;
if(!(Mode & UnitDiag))
@@ -88,10 +94,8 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor>
tmp -= it.value() * other.coeff(it.index(),col);
}
- if (Mode & UnitDiag)
- other.coeffRef(i,col) = tmp;
- else
- other.coeffRef(i,col) = tmp/l_ii;
+ if (Mode & UnitDiag) other.coeffRef(i,col) = tmp;
+ else other.coeffRef(i,col) = tmp/l_ii;
}
}
}
@@ -103,8 +107,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Lhs::Index Index;
+ typedef typename evaluator<Lhs>::type LhsEval;
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
static void run(const Lhs& lhs, Rhs& other)
{
+ LhsEval lhsEval(lhs);
for(Index col=0 ; col<other.cols() ; ++col)
{
for(Index i=0; i<lhs.cols(); ++i)
@@ -112,7 +119,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor>
Scalar& tmp = other.coeffRef(i,col);
if (tmp!=Scalar(0)) // optimization when other is actually sparse
{
- typename Lhs::InnerIterator it(lhs, i);
+ LhsIterator it(lhsEval, i);
while(it && it.index()<i)
++it;
if(!(Mode & UnitDiag))
@@ -136,8 +143,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>
{
typedef typename Rhs::Scalar Scalar;
typedef typename Lhs::Index Index;
+ typedef typename evaluator<Lhs>::type LhsEval;
+ typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
static void run(const Lhs& lhs, Rhs& other)
{
+ LhsEval lhsEval(lhs);
for(Index col=0 ; col<other.cols() ; ++col)
{
for(Index i=lhs.cols()-1; i>=0; --i)
@@ -148,13 +158,13 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>
if(!(Mode & UnitDiag))
{
// TODO replace this by a binary search. make sure the binary search is safe for partially sorted elements
- typename Lhs::ReverseInnerIterator it(lhs, i);
+ LhsIterator it(lhsEval, i);
while(it && it.index()!=i)
- --it;
+ ++it;
eigen_assert(it && it.index()==i);
other.coeffRef(i,col) /= it.value();
}
- typename Lhs::InnerIterator it(lhs, i);
+ LhsIterator it(lhsEval, i);
for(; it && it.index()<i; ++it)
other.coeffRef(it.index(), col) -= tmp * it.value();
}
@@ -165,11 +175,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>
} // end namespace internal
-template<typename ExpressionType,int Mode>
+template<typename ExpressionType,unsigned int Mode>
template<typename OtherDerived>
-void SparseTriangularView<ExpressionType,Mode>::solveInPlace(MatrixBase<OtherDerived>& other) const
+void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(MatrixBase<OtherDerived>& other) const
{
- eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows());
+ eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows());
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };
@@ -178,22 +188,12 @@ void SparseTriangularView<ExpressionType,Mode>::solveInPlace(MatrixBase<OtherDer
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
OtherCopy otherCopy(other.derived());
- internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(m_matrix, otherCopy);
+ internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(derived().nestedExpression(), otherCopy);
if (copy)
other = otherCopy;
}
-template<typename ExpressionType,int Mode>
-template<typename OtherDerived>
-typename internal::plain_matrix_type_column_major<OtherDerived>::type
-SparseTriangularView<ExpressionType,Mode>::solve(const MatrixBase<OtherDerived>& other) const
-{
- typename internal::plain_matrix_type_column_major<OtherDerived>::type res(other);
- solveInPlace(res);
- return res;
-}
-
// pure sparse path
namespace internal {
@@ -290,11 +290,11 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor>
} // end namespace internal
-template<typename ExpressionType,int Mode>
+template<typename ExpressionType,unsigned int Mode>
template<typename OtherDerived>
-void SparseTriangularView<ExpressionType,Mode>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const
+void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const
{
- eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows());
+ eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows());
eigen_assert( (!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
// enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };
@@ -303,7 +303,7 @@ void SparseTriangularView<ExpressionType,Mode>::solveInPlace(SparseMatrixBase<Ot
// typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
// OtherCopy otherCopy(other.derived());
- internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(m_matrix, other.derived());
+ internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(derived().nestedExpression(), other.derived());
// if (copy)
// other = otherCopy;
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 7a9aeec2d..79b78da99 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -70,9 +70,14 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu
* \sa \ref OrderingMethods_Module
*/
template <typename _MatrixType, typename _OrderingType>
-class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index>
+class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index>
{
+ protected:
+ typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
+ using APIBase::m_isInitialized;
public:
+ using APIBase::_solve_impl;
+
typedef _MatrixType MatrixType;
typedef _OrderingType OrderingType;
typedef typename MatrixType::Scalar Scalar;
@@ -86,11 +91,11 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
typedef internal::SparseLUImpl<Scalar, Index> Base;
public:
- SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+ SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
{
initperfvalues();
}
- SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+ explicit SparseLU(const MatrixType& matrix):m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
{
initperfvalues();
compute(matrix);
@@ -168,6 +173,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
m_diagpivotthresh = thresh;
}
+#ifdef EIGEN_PARSED_BY_DOXYGEN
/** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
*
* \warning the destination matrix X in X = this->solve(B) must be colmun-major.
@@ -175,26 +181,8 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
* \sa compute()
*/
template<typename Rhs>
- inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const
- {
- eigen_assert(m_factorizationIsOk && "SparseLU is not initialized.");
- eigen_assert(rows()==B.rows()
- && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
- return internal::solve_retval<SparseLU, Rhs>(*this, B.derived());
- }
-
- /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::sparse_solve_retval<SparseLU, Rhs> solve(const SparseMatrixBase<Rhs>& B) const
- {
- eigen_assert(m_factorizationIsOk && "SparseLU is not initialized.");
- eigen_assert(rows()==B.rows()
- && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
- return internal::sparse_solve_retval<SparseLU, Rhs>(*this, B.derived());
- }
+ inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
+#endif // EIGEN_PARSED_BY_DOXYGEN
/** \brief Reports whether previous computation was successful.
*
@@ -219,7 +207,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
}
template<typename Rhs, typename Dest>
- bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
+ bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
{
Dest& X(X_base.derived());
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
@@ -261,14 +249,13 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
// Initialize with the determinant of the row matrix
Scalar det = Scalar(1.);
- //Note that the diagonal blocks of U are stored in supernodes,
+ // Note that the diagonal blocks of U are stored in supernodes,
// which are available in the L part :)
for (Index j = 0; j < this->cols(); ++j)
{
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
{
- if(it.row() < j) continue;
- if(it.row() == j)
+ if(it.index() == j)
{
det *= abs(it.value());
break;
@@ -322,7 +309,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
// Functions
void initperfvalues()
{
- m_perfv.panel_size = 1;
+ m_perfv.panel_size = 16;
m_perfv.relax = 1;
m_perfv.maxsuper = 128;
m_perfv.rowblk = 16;
@@ -332,7 +319,6 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
// Variables
mutable ComputationInfo m_info;
- bool m_isInitialized;
bool m_factorizationIsOk;
bool m_analysisIsOk;
std::string m_lastError;
@@ -377,30 +363,32 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
//TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
+ // Firstly, copy the whole input matrix.
+ m_mat = mat;
+
+ // Compute fill-in ordering
OrderingType ord;
- ord(mat,m_perm_c);
+ ord(m_mat,m_perm_c);
// Apply the permutation to the column of the input matrix
- //First copy the whole input matrix.
- m_mat = mat;
- if (m_perm_c.size()) {
+ if (m_perm_c.size())
+ {
m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.
- //Then, permute only the column pointers
- const Index * outerIndexPtr;
- if (mat.isCompressed()) outerIndexPtr = mat.outerIndexPtr();
- else
- {
- Index *outerIndexPtr_t = new Index[mat.cols()+1];
- for(Index i = 0; i <= mat.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
- outerIndexPtr = outerIndexPtr_t;
- }
+ // Then, permute only the column pointers
+ ei_declare_aligned_stack_constructed_variable(Index,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<Index*>(mat.outerIndexPtr()):0);
+
+ // If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.
+ if(!mat.isCompressed())
+ IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);
+
+ // Apply the permutation and compute the nnz per column.
for (Index i = 0; i < mat.cols(); i++)
{
m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
}
- if(!mat.isCompressed()) delete[] outerIndexPtr;
}
+
// Compute the column elimination tree of the permuted matrix
IndexVector firstRowElt;
internal::coletree(m_mat, m_etree,firstRowElt);
@@ -463,6 +451,8 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
typedef typename IndexVector::Scalar Index;
+ m_isInitialized = true;
+
// Apply the column permutation computed in analyzepattern()
// m_mat = matrix * m_perm_c.inverse();
@@ -661,7 +651,7 @@ struct SparseLUMatrixLReturnType : internal::no_assignment_operator
{
typedef typename MappedSupernodalType::Index Index;
typedef typename MappedSupernodalType::Scalar Scalar;
- SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
+ explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
{ }
Index rows() { return m_mapL.rows(); }
Index cols() { return m_mapL.cols(); }
@@ -678,7 +668,7 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
{
typedef typename MatrixLType::Index Index;
typedef typename MatrixLType::Scalar Scalar;
- SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
+ explicit SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
: m_mapL(mapL),m_mapU(mapU)
{ }
Index rows() { return m_mapL.rows(); }
@@ -686,8 +676,11 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const
{
- Index nrhs = X.cols();
- Index n = X.rows();
+ /* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */
+ eigen_assert(X.rows() <= NumTraits<Index>::highest());
+ eigen_assert(X.cols() <= NumTraits<Index>::highest());
+ Index nrhs = Index(X.cols());
+ Index n = Index(X.rows());
// Backward solve with U
for (Index k = m_mapL.nsuper(); k >= 0; k--)
{
@@ -728,35 +721,6 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
const MatrixUType& m_mapU;
};
-namespace internal {
-
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
- : solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
-{
- typedef SparseLU<_MatrixType,Derived> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct sparse_solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
- : sparse_solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
-{
- typedef SparseLU<_MatrixType,Derived> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- this->defaultEvalTo(dst);
- }
-};
-} // end namespace internal
-
} // End namespace Eigen
#endif
diff --git a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
index ad6f2183f..e8ee35a94 100644
--- a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
+++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -189,8 +189,8 @@ class MappedSuperNodalMatrix<Scalar,Index>::InnerIterator
m_idval(mat.colIndexPtr()[outer]),
m_startidval(m_idval),
m_endidval(mat.colIndexPtr()[outer+1]),
- m_idrow(mat.rowIndexPtr()[outer]),
- m_endidrow(mat.rowIndexPtr()[outer+1])
+ m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]),
+ m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1])
{}
inline InnerIterator& operator++()
{
@@ -233,8 +233,11 @@ template<typename Scalar, typename Index>
template<typename Dest>
void MappedSuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
{
- Index n = X.rows();
- Index nrhs = X.cols();
+ /* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */
+ eigen_assert(X.rows() <= NumTraits<Index>::highest());
+ eigen_assert(X.cols() <= NumTraits<Index>::highest());
+ Index n = Index(X.rows());
+ Index nrhs = Index(X.cols());
const Scalar * Lval = valuePtr(); // Nonzero values
Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs); // working vector
work.setZero();
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index 0d0283b13..cad149ded 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -31,13 +31,13 @@ namespace internal {
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
{
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
- static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+ static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
};
template <int SegSizeAtCompileTime>
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
-EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
{
typedef typename ScalarVector::Scalar Scalar;
@@ -45,7 +45,7 @@ EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsi
// The result of triangular solve is in tempv[*];
// The result of matric-vector update is in dense[*]
Index isub = lptr + no_zeros;
- int i;
+ Index i;
Index irow;
for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
{
@@ -92,13 +92,13 @@ EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsi
template <> struct LU_kernel_bmod<1>
{
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
- static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+ static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
};
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
-EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
{
typedef typename ScalarVector::Scalar Scalar;
diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index 002b4824b..133211488 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -62,9 +62,13 @@ namespace internal {
*
*/
template<typename _MatrixType, typename _OrderingType>
-class SparseQR
+class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
{
+ protected:
+ typedef SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > Base;
+ using Base::m_isInitialized;
public:
+ using Base::_solve_impl;
typedef _MatrixType MatrixType;
typedef _OrderingType OrderingType;
typedef typename MatrixType::Scalar Scalar;
@@ -75,7 +79,7 @@ class SparseQR
typedef Matrix<Scalar, Dynamic, 1> ScalarVector;
typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
public:
- SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
+ SparseQR () : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
{ }
/** Construct a QR factorization of the matrix \a mat.
@@ -84,7 +88,7 @@ class SparseQR
*
* \sa compute()
*/
- SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
+ explicit SparseQR(const MatrixType& mat) : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
{
compute(mat);
}
@@ -162,7 +166,7 @@ class SparseQR
/** \internal */
template<typename Rhs, typename Dest>
- bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const
+ bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const
{
eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
@@ -178,7 +182,7 @@ class SparseQR
y.resize((std::max)(cols(),Index(y.rows())),y.cols());
y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank));
y.bottomRows(y.rows()-rank).setZero();
-
+
// Apply the column permutation
if (m_perm_c.size()) dest = colsPermutation() * y.topRows(cols());
else dest = y.topRows(cols());
@@ -186,7 +190,6 @@ class SparseQR
m_info = Success;
return true;
}
-
/** Sets the threshold that is used to determine linearly dependent columns during the factorization.
*
@@ -204,18 +207,18 @@ class SparseQR
* \sa compute()
*/
template<typename Rhs>
- inline const internal::solve_retval<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const
+ inline const Solve<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const
{
eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
- return internal::solve_retval<SparseQR, Rhs>(*this, B.derived());
+ return Solve<SparseQR, Rhs>(*this, B.derived());
}
template<typename Rhs>
- inline const internal::sparse_solve_retval<SparseQR, Rhs> solve(const SparseMatrixBase<Rhs>& B) const
+ inline const Solve<SparseQR, Rhs> solve(const SparseMatrixBase<Rhs>& B) const
{
eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
- return internal::sparse_solve_retval<SparseQR, Rhs>(*this, B.derived());
+ return Solve<SparseQR, Rhs>(*this, B.derived());
}
/** \brief Reports whether previous computation was successful.
@@ -244,7 +247,6 @@ class SparseQR
protected:
- bool m_isInitialized;
bool m_analysisIsok;
bool m_factorizationIsok;
mutable ComputationInfo m_info;
@@ -282,9 +284,11 @@ template <typename MatrixType, typename OrderingType>
void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
{
eigen_assert(mat.isCompressed() && "SparseQR requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to SparseQR");
+ // Copy to a column major matrix if the input is rowmajor
+ typename internal::conditional<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&>::type matCpy(mat);
// Compute the column fill reducing ordering
OrderingType ord;
- ord(mat, m_perm_c);
+ ord(matCpy, m_perm_c);
Index n = mat.cols();
Index m = mat.rows();
Index diagSize = (std::min)(m,n);
@@ -297,7 +301,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
// Compute the column elimination tree of the permuted matrix
m_outputPerm_c = m_perm_c.inverse();
- internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
+ internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
m_isEtreeOk = true;
m_R.resize(m, n);
@@ -321,7 +325,6 @@ template <typename MatrixType, typename OrderingType>
void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
{
using std::abs;
- using std::max;
eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step");
Index m = mat.rows();
@@ -335,21 +338,35 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
m_R.setZero();
m_Q.setZero();
+ m_pmat = mat;
if(!m_isEtreeOk)
{
m_outputPerm_c = m_perm_c.inverse();
- internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
+ internal::coletree(m_pmat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
m_isEtreeOk = true;
}
-
- m_pmat = mat;
+
m_pmat.uncompress(); // To have the innerNonZeroPtr allocated
+
// Apply the fill-in reducing permutation lazily:
- for (int i = 0; i < n; i++)
{
- Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
- m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i];
- m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i];
+ // If the input is row major, copy the original column indices,
+ // otherwise directly use the input matrix
+ //
+ IndexVector originalOuterIndicesCpy;
+ const Index *originalOuterIndices = mat.outerIndexPtr();
+ if(MatrixType::IsRowMajor)
+ {
+ originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1);
+ originalOuterIndices = originalOuterIndicesCpy.data();
+ }
+
+ for (int i = 0; i < n; i++)
+ {
+ Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
+ m_pmat.outerIndexPtr()[p] = originalOuterIndices[i];
+ m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i];
+ }
}
/* Compute the default threshold as in MatLab, see:
@@ -359,7 +376,9 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
if(m_useDefaultThreshold)
{
RealScalar max2Norm = 0.0;
- for (int j = 0; j < n; j++) max2Norm = (max)(max2Norm, m_pmat.col(j).norm());
+ for (int j = 0; j < n; j++) max2Norm = numext::maxi(max2Norm, m_pmat.col(j).norm());
+ if(max2Norm==RealScalar(0))
+ max2Norm = RealScalar(1);
pivotThreshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon();
}
@@ -368,7 +387,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
Index nonzeroCol = 0; // Record the number of valid pivots
m_Q.startVec(0);
-
+
// Left looking rank-revealing QR factorization: compute a column of R and Q at a time
for (Index col = 0; col < n; ++col)
{
@@ -384,7 +403,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
// all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k.
// Note: if the diagonal entry does not exist, then its contribution must be explicitly added,
// thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found.
- for (typename MatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
+ for (typename QRMatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
{
Index curIdx = nonzeroCol;
if(itp) curIdx = itp.row();
@@ -536,13 +555,13 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
m_R.finalize();
m_R.makeCompressed();
m_isQSorted = false;
-
+
m_nonzeropivots = nonzeroCol;
if(nonzeroCol<n)
{
// Permute the triangular factor to put the 'dead' columns to the end
- MatrixType tempR(m_R);
+ QRMatrixType tempR(m_R);
m_R = tempR * m_pivotperm;
// Update the column permutation
@@ -554,34 +573,6 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
m_info = Success;
}
-namespace internal {
-
-template<typename _MatrixType, typename OrderingType, typename Rhs>
-struct solve_retval<SparseQR<_MatrixType,OrderingType>, Rhs>
- : solve_retval_base<SparseQR<_MatrixType,OrderingType>, Rhs>
-{
- typedef SparseQR<_MatrixType,OrderingType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-template<typename _MatrixType, typename OrderingType, typename Rhs>
-struct sparse_solve_retval<SparseQR<_MatrixType, OrderingType>, Rhs>
- : sparse_solve_retval_base<SparseQR<_MatrixType, OrderingType>, Rhs>
-{
- typedef SparseQR<_MatrixType, OrderingType> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec, Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- this->defaultEvalTo(dst);
- }
-};
-} // end namespace internal
-
template <typename SparseQRType, typename Derived>
struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived> >
{
@@ -646,7 +637,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
typedef typename SparseQRType::Index Index;
typedef typename SparseQRType::Scalar Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
- SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {}
+ explicit SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {}
template<typename Derived>
SparseQR_QProduct<SparseQRType, Derived> operator*(const MatrixBase<Derived>& other)
{
@@ -682,7 +673,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
template<typename SparseQRType>
struct SparseQRMatrixQTransposeReturnType
{
- SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {}
+ explicit SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {}
template<typename Derived>
SparseQR_QProduct<SparseQRType,Derived> operator*(const MatrixBase<Derived>& other)
{
diff --git a/Eigen/src/StlSupport/StdDeque.h b/Eigen/src/StlSupport/StdDeque.h
index aaf66330b..909600476 100644
--- a/Eigen/src/StlSupport/StdDeque.h
+++ b/Eigen/src/StlSupport/StdDeque.h
@@ -14,7 +14,7 @@
#include "details.h"
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
-#if defined(__INTEL_COMPILER) || defined(__GNUC__)
+#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) template class std::deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
#else
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...)
diff --git a/Eigen/src/StlSupport/StdList.h b/Eigen/src/StlSupport/StdList.h
index 3c742430c..265b46f8e 100644
--- a/Eigen/src/StlSupport/StdList.h
+++ b/Eigen/src/StlSupport/StdList.h
@@ -13,7 +13,7 @@
#include "details.h"
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
-#if defined(__INTEL_COMPILER) || defined(__GNUC__)
+#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) template class std::list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
#else
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...)
diff --git a/Eigen/src/StlSupport/details.h b/Eigen/src/StlSupport/details.h
index d8debc7c4..e42ec024f 100644
--- a/Eigen/src/StlSupport/details.h
+++ b/Eigen/src/StlSupport/details.h
@@ -46,7 +46,7 @@ namespace Eigen {
~aligned_allocator_indirection() {}
};
-#ifdef _MSC_VER
+#if EIGEN_COMP_MSVC
// sometimes, MSVC detects, at compile time, that the argument x
// in std::vector::resize(size_t s,T x) won't be aligned and generate an error
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index bcb355760..6de5b3dc5 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -288,8 +288,12 @@ MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
* \brief The base class for the direct and incomplete LU factorization of SuperLU
*/
template<typename _MatrixType, typename Derived>
-class SuperLUBase : internal::noncopyable
+class SuperLUBase : public SparseSolverBase<Derived>
{
+ protected:
+ typedef SparseSolverBase<Derived> Base;
+ using Base::derived;
+ using Base::m_isInitialized;
public:
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
@@ -309,9 +313,6 @@ class SuperLUBase : internal::noncopyable
clearFactors();
}
- Derived& derived() { return *static_cast<Derived*>(this); }
- const Derived& derived() const { return *static_cast<const Derived*>(this); }
-
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
@@ -335,33 +336,7 @@ class SuperLUBase : internal::noncopyable
derived().analyzePattern(matrix);
derived().factorize(matrix);
}
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::solve_retval<SuperLUBase, Rhs> solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "SuperLU is not initialized.");
- eigen_assert(rows()==b.rows()
- && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<SuperLUBase, Rhs>(*this, b.derived());
- }
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::sparse_solve_retval<SuperLUBase, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "SuperLU is not initialized.");
- eigen_assert(rows()==b.rows()
- && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
- return internal::sparse_solve_retval<SuperLUBase, Rhs>(*this, b.derived());
- }
-
+
/** Performs a symbolic decomposition on the sparcity of \a matrix.
*
* This function is particularly useful when solving for several problems having the same structure.
@@ -453,7 +428,6 @@ class SuperLUBase : internal::noncopyable
mutable char m_sluEqued;
mutable ComputationInfo m_info;
- bool m_isInitialized;
int m_factorizationIsOk;
int m_analysisIsOk;
mutable bool m_extractedDataAreDirty;
@@ -491,10 +465,11 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
typedef TriangularView<LUMatrixType, Upper> UMatrixType;
public:
+ using Base::_solve_impl;
SuperLU() : Base() { init(); }
- SuperLU(const MatrixType& matrix) : Base()
+ explicit SuperLU(const MatrixType& matrix) : Base()
{
init();
Base::compute(matrix);
@@ -528,7 +503,7 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
+ void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
#endif // EIGEN_PARSED_BY_DOXYGEN
inline const LMatrixType& matrixL() const
@@ -637,7 +612,7 @@ void SuperLU<MatrixType>::factorize(const MatrixType& a)
template<typename MatrixType>
template<typename Rhs,typename Dest>
-void SuperLU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
+void SuperLU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
{
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()");
@@ -652,8 +627,12 @@ void SuperLU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x)
m_sluFerr.resize(rhsCols);
m_sluBerr.resize(rhsCols);
- m_sluB = SluMatrix::Map(b.const_cast_derived());
- m_sluX = SluMatrix::Map(x.derived());
+
+ Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b);
+ Ref<const Matrix<typename Dest::Scalar,Dynamic,Dynamic,ColMajor> > x_ref(x);
+
+ m_sluB = SluMatrix::Map(b_ref.const_cast_derived());
+ m_sluX = SluMatrix::Map(x_ref.const_cast_derived());
typename Rhs::PlainObject b_cpy;
if(m_sluEqued!='N')
@@ -676,6 +655,10 @@ void SuperLU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x)
&m_sluFerr[0], &m_sluBerr[0],
&m_sluStat, &info, Scalar());
StatFree(&m_sluStat);
+
+ if(&x.coeffRef(0) != x_ref.data())
+ x = x_ref;
+
m_info = info==0 ? Success : NumericalIssue;
}
@@ -828,6 +811,7 @@ class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> >
typedef typename Base::Index Index;
public:
+ using Base::_solve_impl;
SuperILU() : Base() { init(); }
@@ -863,7 +847,7 @@ class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> >
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
+ void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
#endif // EIGEN_PARSED_BY_DOXYGEN
protected:
@@ -948,7 +932,7 @@ void SuperILU<MatrixType>::factorize(const MatrixType& a)
template<typename MatrixType>
template<typename Rhs,typename Dest>
-void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
+void SuperILU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
{
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()");
@@ -962,8 +946,12 @@ void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x)
m_sluFerr.resize(rhsCols);
m_sluBerr.resize(rhsCols);
- m_sluB = SluMatrix::Map(b.const_cast_derived());
- m_sluX = SluMatrix::Map(x.derived());
+
+ Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b);
+ Ref<const Matrix<typename Dest::Scalar,Dynamic,Dynamic,ColMajor> > x_ref(x);
+
+ m_sluB = SluMatrix::Map(b_ref.const_cast_derived());
+ m_sluX = SluMatrix::Map(x_ref.const_cast_derived());
typename Rhs::PlainObject b_cpy;
if(m_sluEqued!='N')
@@ -986,41 +974,14 @@ void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x)
&recip_pivot_growth, &rcond,
&m_sluStat, &info, Scalar());
StatFree(&m_sluStat);
+
+ if(&x.coeffRef(0) != x_ref.data())
+ x = x_ref;
m_info = info==0 ? Success : NumericalIssue;
}
#endif
-namespace internal {
-
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs>
- : solve_retval_base<SuperLUBase<_MatrixType,Derived>, Rhs>
-{
- typedef SuperLUBase<_MatrixType,Derived> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec().derived()._solve(rhs(),dst);
- }
-};
-
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct sparse_solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs>
- : sparse_solve_retval_base<SuperLUBase<_MatrixType,Derived>, Rhs>
-{
- typedef SuperLUBase<_MatrixType,Derived> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- this->defaultEvalTo(dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_SUPERLUSUPPORT_H
diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h
index 3a48cecf7..a2bb75b09 100644
--- a/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -107,6 +107,16 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N
return umfpack_zi_get_determinant(&mx_real,0,Ex,NumericHandle,User_Info);
}
+namespace internal {
+ template<typename T> struct umfpack_helper_is_sparse_plain : false_type {};
+ template<typename Scalar, int Options, typename StorageIndex>
+ struct umfpack_helper_is_sparse_plain<SparseMatrix<Scalar,Options,StorageIndex> >
+ : true_type {};
+ template<typename Scalar, int Options, typename StorageIndex>
+ struct umfpack_helper_is_sparse_plain<MappedSparseMatrix<Scalar,Options,StorageIndex> >
+ : true_type {};
+}
+
/** \ingroup UmfPackSupport_Module
* \brief A sparse LU factorization and solver based on UmfPack
*
@@ -121,9 +131,13 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N
* \sa \ref TutorialSparseDirectSolvers
*/
template<typename _MatrixType>
-class UmfPackLU : internal::noncopyable
+class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
{
+ protected:
+ typedef SparseSolverBase<UmfPackLU<_MatrixType> > Base;
+ using Base::m_isInitialized;
public:
+ using Base::_solve_impl;
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
@@ -138,7 +152,7 @@ class UmfPackLU : internal::noncopyable
UmfPackLU() { init(); }
- UmfPackLU(const MatrixType& matrix)
+ explicit UmfPackLU(const MatrixType& matrix)
{
init();
compute(matrix);
@@ -192,36 +206,14 @@ class UmfPackLU : internal::noncopyable
* Note that the matrix should be column-major, and in compressed format for best performance.
* \sa SparseMatrix::makeCompressed().
*/
- void compute(const MatrixType& matrix)
- {
- analyzePattern(matrix);
- factorize(matrix);
- }
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::solve_retval<UmfPackLU, Rhs> solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "UmfPackLU is not initialized.");
- eigen_assert(rows()==b.rows()
- && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<UmfPackLU, Rhs>(*this, b.derived());
- }
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
- *
- * \sa compute()
- */
- template<typename Rhs>
- inline const internal::sparse_solve_retval<UmfPackLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
+ template<typename InputMatrixType>
+ void compute(const InputMatrixType& matrix)
{
- eigen_assert(m_isInitialized && "UmfPackLU is not initialized.");
- eigen_assert(rows()==b.rows()
- && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b");
- return internal::sparse_solve_retval<UmfPackLU, Rhs>(*this, b.derived());
+ if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar());
+ if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar());
+ grapInput(matrix.derived());
+ analyzePattern_impl();
+ factorize_impl();
}
/** Performs a symbolic decomposition on the sparcity of \a matrix.
@@ -230,23 +222,15 @@ class UmfPackLU : internal::noncopyable
*
* \sa factorize(), compute()
*/
- void analyzePattern(const MatrixType& matrix)
+ template<typename InputMatrixType>
+ void analyzePattern(const InputMatrixType& matrix)
{
- if(m_symbolic)
- umfpack_free_symbolic(&m_symbolic,Scalar());
- if(m_numeric)
- umfpack_free_numeric(&m_numeric,Scalar());
+ if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar());
+ if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar());
- grapInput(matrix);
+ grapInput(matrix.derived());
- int errorCode = 0;
- errorCode = umfpack_symbolic(matrix.rows(), matrix.cols(), m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
- &m_symbolic, 0, 0);
-
- m_isInitialized = true;
- m_info = errorCode ? InvalidInput : Success;
- m_analysisIsOk = true;
- m_factorizationIsOk = false;
+ analyzePattern_impl();
}
/** Performs a numeric decomposition of \a matrix
@@ -255,26 +239,22 @@ class UmfPackLU : internal::noncopyable
*
* \sa analyzePattern(), compute()
*/
- void factorize(const MatrixType& matrix)
+ template<typename InputMatrixType>
+ void factorize(const InputMatrixType& matrix)
{
eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
if(m_numeric)
umfpack_free_numeric(&m_numeric,Scalar());
- grapInput(matrix);
-
- int errorCode;
- errorCode = umfpack_numeric(m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
- m_symbolic, &m_numeric, 0, 0);
-
- m_info = errorCode ? NumericalIssue : Success;
- m_factorizationIsOk = true;
+ grapInput(matrix.derived());
+
+ factorize_impl();
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal */
template<typename BDerived,typename XDerived>
- bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;
+ bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;
#endif
Scalar determinant() const;
@@ -283,19 +263,20 @@ class UmfPackLU : internal::noncopyable
protected:
-
void init()
{
- m_info = InvalidInput;
- m_isInitialized = false;
- m_numeric = 0;
- m_symbolic = 0;
- m_outerIndexPtr = 0;
- m_innerIndexPtr = 0;
- m_valuePtr = 0;
+ m_info = InvalidInput;
+ m_isInitialized = false;
+ m_numeric = 0;
+ m_symbolic = 0;
+ m_outerIndexPtr = 0;
+ m_innerIndexPtr = 0;
+ m_valuePtr = 0;
+ m_extractedDataAreDirty = true;
}
- void grapInput(const MatrixType& mat)
+ template<typename InputMatrixType>
+ void grapInput_impl(const InputMatrixType& mat, internal::true_type)
{
m_copyMatrix.resize(mat.rows(), mat.cols());
if( ((MatrixType::Flags&RowMajorBit)==RowMajorBit) || sizeof(typename MatrixType::Index)!=sizeof(int) || !mat.isCompressed() )
@@ -313,6 +294,45 @@ class UmfPackLU : internal::noncopyable
m_valuePtr = mat.valuePtr();
}
}
+
+ template<typename InputMatrixType>
+ void grapInput_impl(const InputMatrixType& mat, internal::false_type)
+ {
+ m_copyMatrix = mat;
+ m_outerIndexPtr = m_copyMatrix.outerIndexPtr();
+ m_innerIndexPtr = m_copyMatrix.innerIndexPtr();
+ m_valuePtr = m_copyMatrix.valuePtr();
+ }
+
+ template<typename InputMatrixType>
+ void grapInput(const InputMatrixType& mat)
+ {
+ grapInput_impl(mat, internal::umfpack_helper_is_sparse_plain<InputMatrixType>());
+ }
+
+ void analyzePattern_impl()
+ {
+ int errorCode = 0;
+ errorCode = umfpack_symbolic(m_copyMatrix.rows(), m_copyMatrix.cols(), m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
+ &m_symbolic, 0, 0);
+
+ m_isInitialized = true;
+ m_info = errorCode ? InvalidInput : Success;
+ m_analysisIsOk = true;
+ m_factorizationIsOk = false;
+ m_extractedDataAreDirty = true;
+ }
+
+ void factorize_impl()
+ {
+ int errorCode;
+ errorCode = umfpack_numeric(m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
+ m_symbolic, &m_numeric, 0, 0);
+
+ m_info = errorCode ? NumericalIssue : Success;
+ m_factorizationIsOk = true;
+ m_extractedDataAreDirty = true;
+ }
// cached data to reduce reallocation, etc.
mutable LUMatrixType m_l;
@@ -328,7 +348,6 @@ class UmfPackLU : internal::noncopyable
void* m_symbolic;
mutable ComputationInfo m_info;
- bool m_isInitialized;
int m_factorizationIsOk;
int m_analysisIsOk;
mutable bool m_extractedDataAreDirty;
@@ -376,7 +395,7 @@ typename UmfPackLU<MatrixType>::Scalar UmfPackLU<MatrixType>::determinant() cons
template<typename MatrixType>
template<typename BDerived,typename XDerived>
-bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const
+bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const
{
const int rhsCols = b.cols();
eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet");
@@ -384,11 +403,22 @@ bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDe
eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve");
int errorCode;
+ Scalar* x_ptr = 0;
+ Matrix<Scalar,Dynamic,1> x_tmp;
+ if(x.innerStride()!=1)
+ {
+ x_tmp.resize(x.rows());
+ x_ptr = x_tmp.data();
+ }
for (int j=0; j<rhsCols; ++j)
{
+ if(x.innerStride()==1)
+ x_ptr = &x.col(j).coeffRef(0);
errorCode = umfpack_solve(UMFPACK_A,
m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
- &x.col(j).coeffRef(0), &b.const_cast_derived().col(j).coeffRef(0), m_numeric, 0, 0);
+ x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, 0, 0);
+ if(x.innerStride()!=1)
+ x.col(j) = x_tmp;
if (errorCode!=0)
return false;
}
@@ -396,37 +426,6 @@ bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDe
return true;
}
-
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<UmfPackLU<_MatrixType>, Rhs>
- : solve_retval_base<UmfPackLU<_MatrixType>, Rhs>
-{
- typedef UmfPackLU<_MatrixType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-template<typename _MatrixType, typename Rhs>
-struct sparse_solve_retval<UmfPackLU<_MatrixType>, Rhs>
- : sparse_solve_retval_base<UmfPackLU<_MatrixType>, Rhs>
-{
- typedef UmfPackLU<_MatrixType> Dec;
- EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- this->defaultEvalTo(dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_UMFPACKSUPPORT_H
diff --git a/Eigen/src/misc/Kernel.h b/Eigen/src/misc/Kernel.h
index b9e1518fd..4b03e44c1 100644
--- a/Eigen/src/misc/Kernel.h
+++ b/Eigen/src/misc/Kernel.h
@@ -41,7 +41,7 @@ template<typename _DecompositionType> struct kernel_retval_base
typedef ReturnByValue<kernel_retval_base> Base;
typedef typename Base::Index Index;
- kernel_retval_base(const DecompositionType& dec)
+ explicit kernel_retval_base(const DecompositionType& dec)
: m_dec(dec),
m_rank(dec.rank()),
m_cols(m_rank==dec.cols() ? 1 : dec.cols() - m_rank)
diff --git a/Eigen/src/misc/Solve.h b/Eigen/src/misc/Solve.h
deleted file mode 100644
index 7f70d60af..000000000
--- a/Eigen/src/misc/Solve.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_MISC_SOLVE_H
-#define EIGEN_MISC_SOLVE_H
-
-namespace Eigen {
-
-namespace internal {
-
-/** \class solve_retval_base
- *
- */
-template<typename DecompositionType, typename Rhs>
-struct traits<solve_retval_base<DecompositionType, Rhs> >
-{
- typedef typename DecompositionType::MatrixType MatrixType;
- typedef Matrix<typename Rhs::Scalar,
- MatrixType::ColsAtCompileTime,
- Rhs::ColsAtCompileTime,
- Rhs::PlainObject::Options,
- MatrixType::MaxColsAtCompileTime,
- Rhs::MaxColsAtCompileTime> ReturnType;
-};
-
-template<typename _DecompositionType, typename Rhs> struct solve_retval_base
- : public ReturnByValue<solve_retval_base<_DecompositionType, Rhs> >
-{
- typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
- typedef _DecompositionType DecompositionType;
- typedef ReturnByValue<solve_retval_base> Base;
- typedef typename Base::Index Index;
-
- solve_retval_base(const DecompositionType& dec, const Rhs& rhs)
- : m_dec(dec), m_rhs(rhs)
- {}
-
- inline Index rows() const { return m_dec.cols(); }
- inline Index cols() const { return m_rhs.cols(); }
- inline const DecompositionType& dec() const { return m_dec; }
- inline const RhsNestedCleaned& rhs() const { return m_rhs; }
-
- template<typename Dest> inline void evalTo(Dest& dst) const
- {
- static_cast<const solve_retval<DecompositionType,Rhs>*>(this)->evalTo(dst);
- }
-
- protected:
- const DecompositionType& m_dec;
- typename Rhs::Nested m_rhs;
-};
-
-} // end namespace internal
-
-#define EIGEN_MAKE_SOLVE_HELPERS(DecompositionType,Rhs) \
- typedef typename DecompositionType::MatrixType MatrixType; \
- typedef typename MatrixType::Scalar Scalar; \
- typedef typename MatrixType::RealScalar RealScalar; \
- typedef typename MatrixType::Index Index; \
- typedef Eigen::internal::solve_retval_base<DecompositionType,Rhs> Base; \
- using Base::dec; \
- using Base::rhs; \
- using Base::rows; \
- using Base::cols; \
- solve_retval(const DecompositionType& dec, const Rhs& rhs) \
- : Base(dec, rhs) {}
-
-} // end namespace Eigen
-
-#endif // EIGEN_MISC_SOLVE_H
diff --git a/Eigen/src/misc/SparseSolve.h b/Eigen/src/misc/SparseSolve.h
deleted file mode 100644
index 05caa9266..000000000
--- a/Eigen/src/misc/SparseSolve.h
+++ /dev/null
@@ -1,130 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SPARSE_SOLVE_H
-#define EIGEN_SPARSE_SOLVE_H
-
-namespace Eigen {
-
-namespace internal {
-
-template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_base;
-template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval;
-
-template<typename DecompositionType, typename Rhs>
-struct traits<sparse_solve_retval_base<DecompositionType, Rhs> >
-{
- typedef typename DecompositionType::MatrixType MatrixType;
- typedef SparseMatrix<typename Rhs::Scalar, Rhs::Options, typename Rhs::Index> ReturnType;
-};
-
-template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_base
- : public ReturnByValue<sparse_solve_retval_base<_DecompositionType, Rhs> >
-{
- typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
- typedef _DecompositionType DecompositionType;
- typedef ReturnByValue<sparse_solve_retval_base> Base;
- typedef typename Base::Index Index;
-
- sparse_solve_retval_base(const DecompositionType& dec, const Rhs& rhs)
- : m_dec(dec), m_rhs(rhs)
- {}
-
- inline Index rows() const { return m_dec.cols(); }
- inline Index cols() const { return m_rhs.cols(); }
- inline const DecompositionType& dec() const { return m_dec; }
- inline const RhsNestedCleaned& rhs() const { return m_rhs; }
-
- template<typename Dest> inline void evalTo(Dest& dst) const
- {
- static_cast<const sparse_solve_retval<DecompositionType,Rhs>*>(this)->evalTo(dst);
- }
-
- protected:
- template<typename DestScalar, int DestOptions, typename DestIndex>
- inline void defaultEvalTo(SparseMatrix<DestScalar,DestOptions,DestIndex>& dst) const
- {
- // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
- static const int NbColsAtOnce = 4;
- int rhsCols = m_rhs.cols();
- int size = m_rhs.rows();
- // the temporary matrices do not need more columns than NbColsAtOnce:
- int tmpCols = (std::min)(rhsCols, NbColsAtOnce);
- Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols);
- Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols);
- for(int k=0; k<rhsCols; k+=NbColsAtOnce)
- {
- int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
- tmp.leftCols(actualCols) = m_rhs.middleCols(k,actualCols);
- tmpX.leftCols(actualCols) = m_dec.solve(tmp.leftCols(actualCols));
- dst.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView();
- }
- }
- const DecompositionType& m_dec;
- typename Rhs::Nested m_rhs;
-};
-
-#define EIGEN_MAKE_SPARSE_SOLVE_HELPERS(DecompositionType,Rhs) \
- typedef typename DecompositionType::MatrixType MatrixType; \
- typedef typename MatrixType::Scalar Scalar; \
- typedef typename MatrixType::RealScalar RealScalar; \
- typedef typename MatrixType::Index Index; \
- typedef Eigen::internal::sparse_solve_retval_base<DecompositionType,Rhs> Base; \
- using Base::dec; \
- using Base::rhs; \
- using Base::rows; \
- using Base::cols; \
- sparse_solve_retval(const DecompositionType& dec, const Rhs& rhs) \
- : Base(dec, rhs) {}
-
-
-
-template<typename DecompositionType, typename Rhs, typename Guess> struct solve_retval_with_guess;
-
-template<typename DecompositionType, typename Rhs, typename Guess>
-struct traits<solve_retval_with_guess<DecompositionType, Rhs, Guess> >
-{
- typedef typename DecompositionType::MatrixType MatrixType;
- typedef Matrix<typename Rhs::Scalar,
- MatrixType::ColsAtCompileTime,
- Rhs::ColsAtCompileTime,
- Rhs::PlainObject::Options,
- MatrixType::MaxColsAtCompileTime,
- Rhs::MaxColsAtCompileTime> ReturnType;
-};
-
-template<typename DecompositionType, typename Rhs, typename Guess> struct solve_retval_with_guess
- : public ReturnByValue<solve_retval_with_guess<DecompositionType, Rhs, Guess> >
-{
- typedef typename DecompositionType::Index Index;
-
- solve_retval_with_guess(const DecompositionType& dec, const Rhs& rhs, const Guess& guess)
- : m_dec(dec), m_rhs(rhs), m_guess(guess)
- {}
-
- inline Index rows() const { return m_dec.cols(); }
- inline Index cols() const { return m_rhs.cols(); }
-
- template<typename Dest> inline void evalTo(Dest& dst) const
- {
- dst = m_guess;
- m_dec._solveWithGuess(m_rhs,dst);
- }
-
- protected:
- const DecompositionType& m_dec;
- const typename Rhs::Nested m_rhs;
- const typename Guess::Nested m_guess;
-};
-
-} // namepsace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_SPARSE_SOLVE_H
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index f6d7d8944..f6f526d2b 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -1,5 +1,22 @@
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> AbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> Abs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> SqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> InverseReturnType;
+
+typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;
+typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;
+typedef CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> CosReturnType;
+typedef CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> SinReturnType;
+typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturnType;
+typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
+typedef CwiseUnaryOp<internal::scalar_tan_op<Scalar>, const Derived> TanReturnType;
+typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
+typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType;
+typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
+typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
+
/** \returns an expression of the coefficient-wise absolute value of \c *this
*
* Example: \include Cwise_abs.cpp
@@ -8,10 +25,10 @@
* \sa abs2()
*/
EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
+EIGEN_STRONG_INLINE const AbsReturnType
abs() const
{
- return derived();
+ return AbsReturnType(derived());
}
/** \returns an expression of the coefficient-wise squared absolute value of \c *this
@@ -22,10 +39,10 @@ abs() const
* \sa abs(), square()
*/
EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived>
+EIGEN_STRONG_INLINE const Abs2ReturnType
abs2() const
{
- return derived();
+ return Abs2ReturnType(derived());
}
/** \returns an expression of the coefficient-wise exponential of *this.
@@ -39,10 +56,10 @@ abs2() const
* \sa pow(), log(), sin(), cos()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived>
+inline const ExpReturnType
exp() const
{
- return derived();
+ return ExpReturnType(derived());
}
/** \returns an expression of the coefficient-wise logarithm of *this.
@@ -56,10 +73,10 @@ exp() const
* \sa exp()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived>
+inline const LogReturnType
log() const
{
- return derived();
+ return LogReturnType(derived());
}
/** \returns an expression of the coefficient-wise square root of *this.
@@ -73,10 +90,10 @@ log() const
* \sa pow(), square()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived>
+inline const SqrtReturnType
sqrt() const
{
- return derived();
+ return SqrtReturnType(derived());
}
/** \returns an expression of the coefficient-wise cosine of *this.
@@ -90,10 +107,10 @@ sqrt() const
* \sa sin(), acos()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived>
+inline const CosReturnType
cos() const
{
- return derived();
+ return CosReturnType(derived());
}
@@ -108,10 +125,10 @@ cos() const
* \sa cos(), asin()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived>
+inline const SinReturnType
sin() const
{
- return derived();
+ return SinReturnType(derived());
}
/** \returns an expression of the coefficient-wise arc cosine of *this.
@@ -122,10 +139,10 @@ sin() const
* \sa cos(), asin()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived>
+inline const AcosReturnType
acos() const
{
- return derived();
+ return AcosReturnType(derived());
}
/** \returns an expression of the coefficient-wise arc sine of *this.
@@ -136,10 +153,10 @@ acos() const
* \sa sin(), acos()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived>
+inline const AsinReturnType
asin() const
{
- return derived();
+ return AsinReturnType(derived());
}
/** \returns an expression of the coefficient-wise tan of *this.
@@ -150,10 +167,10 @@ asin() const
* \sa cos(), sin()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_tan_op<Scalar>, Derived>
+inline const TanReturnType
tan() const
{
- return derived();
+ return TanReturnType(derived());
}
/** \returns an expression of the coefficient-wise arc tan of *this.
@@ -163,10 +180,10 @@ tan() const
*
* \sa cos(), sin(), tan()
*/
-inline const CwiseUnaryOp<internal::scalar_atan_op<Scalar>, Derived>
+inline const AtanReturnType
atan() const
{
- return derived();
+ return AtanReturnType(derived());
}
/** \returns an expression of the coefficient-wise power of *this to the given exponent.
@@ -180,11 +197,10 @@ atan() const
* \sa exp(), log()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived>
+inline const PowReturnType
pow(const Scalar& exponent) const
{
- return CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived>
- (derived(), internal::scalar_pow_op<Scalar>(exponent));
+ return PowReturnType(derived(), internal::scalar_pow_op<Scalar>(exponent));
}
@@ -196,10 +212,10 @@ pow(const Scalar& exponent) const
* \sa operator/(), operator*()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived>
+inline const InverseReturnType
inverse() const
{
- return derived();
+ return InverseReturnType(derived());
}
/** \returns an expression of the coefficient-wise square of *this.
@@ -210,10 +226,10 @@ inverse() const
* \sa operator/(), operator*(), abs2()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived>
+inline const SquareReturnType
square() const
{
- return derived();
+ return SquareReturnType(derived());
}
/** \returns an expression of the coefficient-wise cube of *this.
@@ -224,10 +240,10 @@ square() const
* \sa square(), pow()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived>
+inline const CubeReturnType
cube() const
{
- return derived();
+ return CubeReturnType(derived());
}
#define EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(METHOD_NAME,FUNCTOR) \
diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h
index a17153e64..050bce03c 100644
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h
@@ -14,6 +14,8 @@
/** \internal Represents a scalar multiple of an expression */
typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived> ScalarMultipleReturnType;
+typedef CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived> ScalarComplexMultipleReturnType;
+
/** \internal Represents a quotient of an expression by a scalar*/
typedef CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived> ScalarQuotient1ReturnType;
/** \internal the return type of conjugate() */
@@ -36,13 +38,16 @@ typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturn
/** \internal the return type of imag() */
typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
+typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
+//typedef CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived>
+
#endif // not EIGEN_PARSED_BY_DOXYGEN
/** \returns an expression of the opposite of \c *this
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_opposite_op<typename internal::traits<Derived>::Scalar>, const Derived>
-operator-() const { return derived(); }
+inline const NegativeReturnType
+operator-() const { return NegativeReturnType(derived()); }
/** \returns an expression of \c *this scaled by the scalar factor \a scalar */
@@ -50,8 +55,7 @@ EIGEN_DEVICE_FUNC
inline const ScalarMultipleReturnType
operator*(const Scalar& scalar) const
{
- return CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived>
- (derived(), internal::scalar_multiple_op<Scalar>(scalar));
+ return ScalarMultipleReturnType(derived(), internal::scalar_multiple_op<Scalar>(scalar));
}
#ifdef EIGEN_PARSED_BY_DOXYGEN
@@ -60,20 +64,18 @@ const ScalarMultipleReturnType operator*(const RealScalar& scalar) const;
/** \returns an expression of \c *this divided by the scalar value \a scalar */
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>, const Derived>
+inline const ScalarQuotient1ReturnType
operator/(const Scalar& scalar) const
{
- return CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived>
- (derived(), internal::scalar_quotient1_op<Scalar>(scalar));
+ return ScalarQuotient1ReturnType(derived(), internal::scalar_quotient1_op<Scalar>(scalar));
}
/** Overloaded for efficient real matrix times complex scalar value */
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived>
+inline const ScalarComplexMultipleReturnType
operator*(const std::complex<Scalar>& scalar) const
{
- return CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived>
- (*static_cast<const Derived*>(this), internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >(scalar));
+ return ScalarComplexMultipleReturnType(derived(), internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >(scalar));
}
EIGEN_DEVICE_FUNC
@@ -86,6 +88,9 @@ inline friend const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::compl
operator*(const std::complex<Scalar>& scalar, const StorageBaseType& matrix)
{ return matrix*scalar; }
+
+template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
+
/** \returns an expression of *this with the \a Scalar type casted to
* \a NewScalar.
*
@@ -95,10 +100,10 @@ operator*(const std::complex<Scalar>& scalar, const StorageBaseType& matrix)
*/
template<typename NewType>
EIGEN_DEVICE_FUNC
-typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<typename internal::traits<Derived>::Scalar, NewType>, const Derived> >::type
+typename CastXpr<NewType>::Type
cast() const
{
- return derived();
+ return typename CastXpr<NewType>::Type(derived());
}
/** \returns an expression of the complex conjugate of \c *this.
@@ -116,14 +121,14 @@ conjugate() const
* \sa imag() */
EIGEN_DEVICE_FUNC
inline RealReturnType
-real() const { return derived(); }
+real() const { return RealReturnType(derived()); }
/** \returns an read-only expression of the imaginary part of \c *this.
*
* \sa real() */
EIGEN_DEVICE_FUNC
inline const ImagReturnType
-imag() const { return derived(); }
+imag() const { return ImagReturnType(derived()); }
/** \brief Apply a unary operator coefficient-wise
* \param[in] func Functor implementing the unary operator
@@ -176,11 +181,11 @@ unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
* \sa imag() */
EIGEN_DEVICE_FUNC
inline NonConstRealReturnType
-real() { return derived(); }
+real() { return NonConstRealReturnType(derived()); }
/** \returns a non const expression of the imaginary part of \c *this.
*
* \sa real() */
EIGEN_DEVICE_FUNC
inline NonConstImagReturnType
-imag() { return derived(); }
+imag() { return NonConstImagReturnType(derived()); }
diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
index 1bb15f862..c99ee94ec 100644
--- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
@@ -10,6 +10,11 @@
// This file is a base class plugin containing matrix specifics coefficient wise functions.
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
+typedef CwiseUnaryOp<std::binder1st<std::equal_to<Scalar> >, const Derived> CwiseScalarEqualReturnType;
/** \returns an expression of the coefficient-wise absolute value of \c *this
*
* Example: \include MatrixBase_cwiseAbs.cpp
@@ -18,8 +23,8 @@
* \sa cwiseAbs2()
*/
EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
-cwiseAbs() const { return derived(); }
+EIGEN_STRONG_INLINE const CwiseAbsReturnType
+cwiseAbs() const { return CwiseAbsReturnType(derived()); }
/** \returns an expression of the coefficient-wise squared absolute value of \c *this
*
@@ -29,8 +34,8 @@ cwiseAbs() const { return derived(); }
* \sa cwiseAbs()
*/
EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived>
-cwiseAbs2() const { return derived(); }
+EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
+cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
/** \returns an expression of the coefficient-wise square root of *this.
*
@@ -40,8 +45,8 @@ cwiseAbs2() const { return derived(); }
* \sa cwisePow(), cwiseSquare()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived>
-cwiseSqrt() const { return derived(); }
+inline const CwiseSqrtReturnType
+cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
/** \returns an expression of the coefficient-wise inverse of *this.
*
@@ -51,8 +56,8 @@ cwiseSqrt() const { return derived(); }
* \sa cwiseProduct()
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived>
-cwiseInverse() const { return derived(); }
+inline const CwiseInverseReturnType
+cwiseInverse() const { return CwiseInverseReturnType(derived()); }
/** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s
*
@@ -64,9 +69,8 @@ cwiseInverse() const { return derived(); }
* \sa cwiseEqual(const MatrixBase<OtherDerived> &) const
*/
EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<std::binder1st<std::equal_to<Scalar> >, const Derived>
+inline const CwiseScalarEqualReturnType
cwiseEqual(const Scalar& s) const
{
- return CwiseUnaryOp<std::binder1st<std::equal_to<Scalar> >,const Derived>
- (derived(), std::bind1st(std::equal_to<Scalar>(), s));
+ return CwiseScalarEqualReturnType(derived(), std::bind1st(std::equal_to<Scalar>(), s));
}
diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh
index 47fe58135..1deabdae2 100644
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@@ -47,7 +47,7 @@ public :
{
#if defined(EIGEN_VECTORIZE_SSE)
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
- #elif defined(EIGEN_VECTORIZE_ALTIVEC)
+ #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
#else
if (SIZE==Dynamic) return "eigen2_novec"; else return "tiny_eigen2_novec";
diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt
index a9bc05137..d0efb4188 100644
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@@ -14,23 +14,18 @@ endif()
add_custom_target(blas)
-set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp)
-
-if(EIGEN_Fortran_COMPILER_WORKS)
-
-set(EigenBlas_SRCS ${EigenBlas_SRCS}
- complexdots.f
- srotm.f srotmg.f drotm.f drotmg.f
- lsame.f dspmv.f ssbmv.f
- chbmv.f sspmv.f
- zhbmv.f chpmv.f dsbmv.f
- zhpmv.f
- dtbmv.f stbmv.f ctbmv.f ztbmv.f
-)
+set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp
+ f2c/srotm.c f2c/srotmg.c f2c/drotm.c f2c/drotmg.c
+ f2c/lsame.c f2c/dspmv.c f2c/ssbmv.c f2c/chbmv.c
+ f2c/sspmv.c f2c/zhbmv.c f2c/chpmv.c f2c/dsbmv.c
+ f2c/zhpmv.c f2c/dtbmv.c f2c/stbmv.c f2c/ctbmv.c
+ f2c/ztbmv.c f2c/d_cnjg.c f2c/r_cnjg.c
+ )
+
+if (EIGEN_Fortran_COMPILER_WORKS)
+ set(EigenBlas_SRCS ${EigenBlas_SRCS} fortran/complexdots.f)
else()
-
-message(WARNING " No fortran compiler has been detected, the blas build will be incomplete.")
-
+ set(EigenBlas_SRCS ${EigenBlas_SRCS} f2c/complexdots.c)
endif()
add_library(eigen_blas_static ${EigenBlas_SRCS})
diff --git a/blas/chbmv.f b/blas/chbmv.f
deleted file mode 100644
index 1b1c330ea..000000000
--- a/blas/chbmv.f
+++ /dev/null
@@ -1,310 +0,0 @@
- SUBROUTINE CHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- COMPLEX ALPHA,BETA
- INTEGER INCX,INCY,K,LDA,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- COMPLEX A(LDA,*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* CHBMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n hermitian band matrix, with k super-diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the band matrix A is being supplied as
-* follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* being supplied.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* being supplied.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry, K specifies the number of super-diagonals of the
-* matrix A. K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* ALPHA - COMPLEX .
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* A - COMPLEX array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the hermitian matrix, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer the upper
-* triangular part of a hermitian band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the hermitian matrix, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer the lower
-* triangular part of a hermitian band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Note that the imaginary parts of the diagonal elements need
-* not be set and are assumed to be zero.
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - COMPLEX array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the
-* vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - COMPLEX .
-* On entry, BETA specifies the scalar beta.
-* Unchanged on exit.
-*
-* Y - COMPLEX array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the
-* vector y. On exit, Y is overwritten by the updated vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- COMPLEX ONE
- PARAMETER (ONE= (1.0E+0,0.0E+0))
- COMPLEX ZERO
- PARAMETER (ZERO= (0.0E+0,0.0E+0))
-* ..
-* .. Local Scalars ..
- COMPLEX TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC CONJG,MAX,MIN,REAL
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (K.LT.0) THEN
- INFO = 3
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 6
- ELSE IF (INCX.EQ.0) THEN
- INFO = 8
- ELSE IF (INCY.EQ.0) THEN
- INFO = 11
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('CHBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array A
-* are accessed sequentially with one pass through A.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when upper triangle of A is stored.
-*
- KPLUS1 = K + 1
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- L = KPLUS1 - J
- DO 50 I = MAX(1,J-K),J - 1
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I)
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- L = KPLUS1 - J
- DO 70 I = MAX(1,J-K),J - 1
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- IF (J.GT.K) THEN
- KX = KX + INCX
- KY = KY + INCY
- END IF
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when lower triangle of A is stored.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*REAL(A(1,J))
- L = 1 - J
- DO 90 I = J + 1,MIN(N,J+K)
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I)
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*REAL(A(1,J))
- L = 1 - J
- IX = JX
- IY = JY
- DO 110 I = J + 1,MIN(N,J+K)
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of CHBMV .
-*
- END
diff --git a/blas/chpmv.f b/blas/chpmv.f
deleted file mode 100644
index 158be5a7b..000000000
--- a/blas/chpmv.f
+++ /dev/null
@@ -1,272 +0,0 @@
- SUBROUTINE CHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- COMPLEX ALPHA,BETA
- INTEGER INCX,INCY,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- COMPLEX AP(*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* CHPMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n hermitian matrix, supplied in packed form.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the matrix A is supplied in the packed
-* array AP as follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* supplied in AP.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* supplied in AP.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* ALPHA - COMPLEX .
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* AP - COMPLEX array of DIMENSION at least
-* ( ( n*( n + 1 ) )/2 ).
-* Before entry with UPLO = 'U' or 'u', the array AP must
-* contain the upper triangular part of the hermitian matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-* and a( 2, 2 ) respectively, and so on.
-* Before entry with UPLO = 'L' or 'l', the array AP must
-* contain the lower triangular part of the hermitian matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-* and a( 3, 1 ) respectively, and so on.
-* Note that the imaginary parts of the diagonal elements need
-* not be set and are assumed to be zero.
-* Unchanged on exit.
-*
-* X - COMPLEX array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - COMPLEX .
-* On entry, BETA specifies the scalar beta. When BETA is
-* supplied as zero then Y need not be set on input.
-* Unchanged on exit.
-*
-* Y - COMPLEX array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the n
-* element vector y. On exit, Y is overwritten by the updated
-* vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- COMPLEX ONE
- PARAMETER (ONE= (1.0E+0,0.0E+0))
- COMPLEX ZERO
- PARAMETER (ZERO= (0.0E+0,0.0E+0))
-* ..
-* .. Local Scalars ..
- COMPLEX TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC CONJG,REAL
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (INCX.EQ.0) THEN
- INFO = 6
- ELSE IF (INCY.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('CHPMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array AP
-* are accessed sequentially with one pass through AP.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- KK = 1
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when AP contains the upper triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- K = KK
- DO 50 I = 1,J - 1
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + CONJG(AP(K))*X(I)
- K = K + 1
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2
- KK = KK + J
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- DO 70 K = KK,KK + J - 2
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + CONJG(AP(K))*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + J
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when AP contains the lower triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*REAL(AP(KK))
- K = KK + 1
- DO 90 I = J + 1,N
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + CONJG(AP(K))*X(I)
- K = K + 1
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- KK = KK + (N-J+1)
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*REAL(AP(KK))
- IX = JX
- IY = JY
- DO 110 K = KK + 1,KK + N - J
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + CONJG(AP(K))*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + (N-J+1)
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of CHPMV .
-*
- END
diff --git a/blas/ctbmv.f b/blas/ctbmv.f
deleted file mode 100644
index 5a879fa01..000000000
--- a/blas/ctbmv.f
+++ /dev/null
@@ -1,366 +0,0 @@
- SUBROUTINE CTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-* .. Scalar Arguments ..
- INTEGER INCX,K,LDA,N
- CHARACTER DIAG,TRANS,UPLO
-* ..
-* .. Array Arguments ..
- COMPLEX A(LDA,*),X(*)
-* ..
-*
-* Purpose
-* =======
-*
-* CTBMV performs one of the matrix-vector operations
-*
-* x := A*x, or x := A'*x, or x := conjg( A' )*x,
-*
-* where x is an n element vector and A is an n by n unit, or non-unit,
-* upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the matrix is an upper or
-* lower triangular matrix as follows:
-*
-* UPLO = 'U' or 'u' A is an upper triangular matrix.
-*
-* UPLO = 'L' or 'l' A is a lower triangular matrix.
-*
-* Unchanged on exit.
-*
-* TRANS - CHARACTER*1.
-* On entry, TRANS specifies the operation to be performed as
-* follows:
-*
-* TRANS = 'N' or 'n' x := A*x.
-*
-* TRANS = 'T' or 't' x := A'*x.
-*
-* TRANS = 'C' or 'c' x := conjg( A' )*x.
-*
-* Unchanged on exit.
-*
-* DIAG - CHARACTER*1.
-* On entry, DIAG specifies whether or not A is unit
-* triangular as follows:
-*
-* DIAG = 'U' or 'u' A is assumed to be unit triangular.
-*
-* DIAG = 'N' or 'n' A is not assumed to be unit
-* triangular.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry with UPLO = 'U' or 'u', K specifies the number of
-* super-diagonals of the matrix A.
-* On entry with UPLO = 'L' or 'l', K specifies the number of
-* sub-diagonals of the matrix A.
-* K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* A - COMPLEX array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer an upper
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer a lower
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Note that when DIAG = 'U' or 'u' the elements of the array A
-* corresponding to the diagonal elements of the matrix are not
-* referenced, but are assumed to be unity.
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - COMPLEX array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x. On exit, X is overwritten with the
-* tranformed vector x.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- COMPLEX ZERO
- PARAMETER (ZERO= (0.0E+0,0.0E+0))
-* ..
-* .. Local Scalars ..
- COMPLEX TEMP
- INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
- LOGICAL NOCONJ,NOUNIT
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC CONJG,MAX,MIN
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
- + .NOT.LSAME(TRANS,'C')) THEN
- INFO = 2
- ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
- INFO = 3
- ELSE IF (N.LT.0) THEN
- INFO = 4
- ELSE IF (K.LT.0) THEN
- INFO = 5
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 7
- ELSE IF (INCX.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('CTBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF (N.EQ.0) RETURN
-*
- NOCONJ = LSAME(TRANS,'T')
- NOUNIT = LSAME(DIAG,'N')
-*
-* Set up the start point in X if the increment is not unity. This
-* will be ( N - 1 )*INCX too small for descending loops.
-*
- IF (INCX.LE.0) THEN
- KX = 1 - (N-1)*INCX
- ELSE IF (INCX.NE.1) THEN
- KX = 1
- END IF
-*
-* Start the operations. In this version the elements of A are
-* accessed sequentially with one pass through A.
-*
- IF (LSAME(TRANS,'N')) THEN
-*
-* Form x := A*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 20 J = 1,N
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = KPLUS1 - J
- DO 10 I = MAX(1,J-K),J - 1
- X(I) = X(I) + TEMP*A(L+I,J)
- 10 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
- END IF
- 20 CONTINUE
- ELSE
- JX = KX
- DO 40 J = 1,N
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = KPLUS1 - J
- DO 30 I = MAX(1,J-K),J - 1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX + INCX
- 30 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
- END IF
- JX = JX + INCX
- IF (J.GT.K) KX = KX + INCX
- 40 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 60 J = N,1,-1
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = 1 - J
- DO 50 I = MIN(N,J+K),J + 1,-1
- X(I) = X(I) + TEMP*A(L+I,J)
- 50 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(1,J)
- END IF
- 60 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 80 J = N,1,-1
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = 1 - J
- DO 70 I = MIN(N,J+K),J + 1,-1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX - INCX
- 70 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(1,J)
- END IF
- JX = JX - INCX
- IF ((N-J).GE.K) KX = KX - INCX
- 80 CONTINUE
- END IF
- END IF
- ELSE
-*
-* Form x := A'*x or x := conjg( A' )*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 110 J = N,1,-1
- TEMP = X(J)
- L = KPLUS1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 90 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(I)
- 90 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J))
- DO 100 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + CONJG(A(L+I,J))*X(I)
- 100 CONTINUE
- END IF
- X(J) = TEMP
- 110 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 140 J = N,1,-1
- TEMP = X(JX)
- KX = KX - INCX
- IX = KX
- L = KPLUS1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 120 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX - INCX
- 120 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J))
- DO 130 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + CONJG(A(L+I,J))*X(IX)
- IX = IX - INCX
- 130 CONTINUE
- END IF
- X(JX) = TEMP
- JX = JX - INCX
- 140 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 170 J = 1,N
- TEMP = X(J)
- L = 1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 150 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(I)
- 150 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J))
- DO 160 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + CONJG(A(L+I,J))*X(I)
- 160 CONTINUE
- END IF
- X(J) = TEMP
- 170 CONTINUE
- ELSE
- JX = KX
- DO 200 J = 1,N
- TEMP = X(JX)
- KX = KX + INCX
- IX = KX
- L = 1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 180 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX + INCX
- 180 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J))
- DO 190 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + CONJG(A(L+I,J))*X(IX)
- IX = IX + INCX
- 190 CONTINUE
- END IF
- X(JX) = TEMP
- JX = JX + INCX
- 200 CONTINUE
- END IF
- END IF
- END IF
-*
- RETURN
-*
-* End of CTBMV .
-*
- END
diff --git a/blas/drotm.f b/blas/drotm.f
deleted file mode 100644
index 63a3b1134..000000000
--- a/blas/drotm.f
+++ /dev/null
@@ -1,147 +0,0 @@
- SUBROUTINE DROTM(N,DX,INCX,DY,INCY,DPARAM)
-* .. Scalar Arguments ..
- INTEGER INCX,INCY,N
-* ..
-* .. Array Arguments ..
- DOUBLE PRECISION DPARAM(5),DX(*),DY(*)
-* ..
-*
-* Purpose
-* =======
-*
-* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX
-*
-* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN
-* (DY**T)
-*
-* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
-* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY.
-* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*
-* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0
-*
-* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0)
-* H=( ) ( ) ( ) ( )
-* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0).
-* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM.
-*
-* Arguments
-* =========
-*
-* N (input) INTEGER
-* number of elements in input vector(s)
-*
-* DX (input/output) DOUBLE PRECISION array, dimension N
-* double precision vector with N elements
-*
-* INCX (input) INTEGER
-* storage spacing between elements of DX
-*
-* DY (input/output) DOUBLE PRECISION array, dimension N
-* double precision vector with N elements
-*
-* INCY (input) INTEGER
-* storage spacing between elements of DY
-*
-* DPARAM (input/output) DOUBLE PRECISION array, dimension 5
-* DPARAM(1)=DFLAG
-* DPARAM(2)=DH11
-* DPARAM(3)=DH21
-* DPARAM(4)=DH12
-* DPARAM(5)=DH22
-*
-* =====================================================================
-*
-* .. Local Scalars ..
- DOUBLE PRECISION DFLAG,DH11,DH12,DH21,DH22,TWO,W,Z,ZERO
- INTEGER I,KX,KY,NSTEPS
-* ..
-* .. Data statements ..
- DATA ZERO,TWO/0.D0,2.D0/
-* ..
-*
- DFLAG = DPARAM(1)
- IF (N.LE.0 .OR. (DFLAG+TWO.EQ.ZERO)) GO TO 140
- IF (.NOT. (INCX.EQ.INCY.AND.INCX.GT.0)) GO TO 70
-*
- NSTEPS = N*INCX
- IF (DFLAG) 50,10,30
- 10 CONTINUE
- DH12 = DPARAM(4)
- DH21 = DPARAM(3)
- DO 20 I = 1,NSTEPS,INCX
- W = DX(I)
- Z = DY(I)
- DX(I) = W + Z*DH12
- DY(I) = W*DH21 + Z
- 20 CONTINUE
- GO TO 140
- 30 CONTINUE
- DH11 = DPARAM(2)
- DH22 = DPARAM(5)
- DO 40 I = 1,NSTEPS,INCX
- W = DX(I)
- Z = DY(I)
- DX(I) = W*DH11 + Z
- DY(I) = -W + DH22*Z
- 40 CONTINUE
- GO TO 140
- 50 CONTINUE
- DH11 = DPARAM(2)
- DH12 = DPARAM(4)
- DH21 = DPARAM(3)
- DH22 = DPARAM(5)
- DO 60 I = 1,NSTEPS,INCX
- W = DX(I)
- Z = DY(I)
- DX(I) = W*DH11 + Z*DH12
- DY(I) = W*DH21 + Z*DH22
- 60 CONTINUE
- GO TO 140
- 70 CONTINUE
- KX = 1
- KY = 1
- IF (INCX.LT.0) KX = 1 + (1-N)*INCX
- IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-*
- IF (DFLAG) 120,80,100
- 80 CONTINUE
- DH12 = DPARAM(4)
- DH21 = DPARAM(3)
- DO 90 I = 1,N
- W = DX(KX)
- Z = DY(KY)
- DX(KX) = W + Z*DH12
- DY(KY) = W*DH21 + Z
- KX = KX + INCX
- KY = KY + INCY
- 90 CONTINUE
- GO TO 140
- 100 CONTINUE
- DH11 = DPARAM(2)
- DH22 = DPARAM(5)
- DO 110 I = 1,N
- W = DX(KX)
- Z = DY(KY)
- DX(KX) = W*DH11 + Z
- DY(KY) = -W + DH22*Z
- KX = KX + INCX
- KY = KY + INCY
- 110 CONTINUE
- GO TO 140
- 120 CONTINUE
- DH11 = DPARAM(2)
- DH12 = DPARAM(4)
- DH21 = DPARAM(3)
- DH22 = DPARAM(5)
- DO 130 I = 1,N
- W = DX(KX)
- Z = DY(KY)
- DX(KX) = W*DH11 + Z*DH12
- DY(KY) = W*DH21 + Z*DH22
- KX = KX + INCX
- KY = KY + INCY
- 130 CONTINUE
- 140 CONTINUE
- RETURN
- END
diff --git a/blas/drotmg.f b/blas/drotmg.f
deleted file mode 100644
index 3ae647b08..000000000
--- a/blas/drotmg.f
+++ /dev/null
@@ -1,206 +0,0 @@
- SUBROUTINE DROTMG(DD1,DD2,DX1,DY1,DPARAM)
-* .. Scalar Arguments ..
- DOUBLE PRECISION DD1,DD2,DX1,DY1
-* ..
-* .. Array Arguments ..
- DOUBLE PRECISION DPARAM(5)
-* ..
-*
-* Purpose
-* =======
-*
-* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS
-* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)*
-* DY2)**T.
-* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*
-* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0
-*
-* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0)
-* H=( ) ( ) ( ) ( )
-* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0).
-* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22
-* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE
-* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.)
-*
-* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE
-* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE
-* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM.
-*
-*
-* Arguments
-* =========
-*
-* DD1 (input/output) DOUBLE PRECISION
-*
-* DD2 (input/output) DOUBLE PRECISION
-*
-* DX1 (input/output) DOUBLE PRECISION
-*
-* DY1 (input) DOUBLE PRECISION
-*
-* DPARAM (input/output) DOUBLE PRECISION array, dimension 5
-* DPARAM(1)=DFLAG
-* DPARAM(2)=DH11
-* DPARAM(3)=DH21
-* DPARAM(4)=DH12
-* DPARAM(5)=DH22
-*
-* =====================================================================
-*
-* .. Local Scalars ..
- DOUBLE PRECISION DFLAG,DH11,DH12,DH21,DH22,DP1,DP2,DQ1,DQ2,DTEMP,
- + DU,GAM,GAMSQ,ONE,RGAMSQ,TWO,ZERO
- INTEGER IGO
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC DABS
-* ..
-* .. Data statements ..
-*
- DATA ZERO,ONE,TWO/0.D0,1.D0,2.D0/
- DATA GAM,GAMSQ,RGAMSQ/4096.D0,16777216.D0,5.9604645D-8/
-* ..
-
- IF (.NOT.DD1.LT.ZERO) GO TO 10
-* GO ZERO-H-D-AND-DX1..
- GO TO 60
- 10 CONTINUE
-* CASE-DD1-NONNEGATIVE
- DP2 = DD2*DY1
- IF (.NOT.DP2.EQ.ZERO) GO TO 20
- DFLAG = -TWO
- GO TO 260
-* REGULAR-CASE..
- 20 CONTINUE
- DP1 = DD1*DX1
- DQ2 = DP2*DY1
- DQ1 = DP1*DX1
-*
- IF (.NOT.DABS(DQ1).GT.DABS(DQ2)) GO TO 40
- DH21 = -DY1/DX1
- DH12 = DP2/DP1
-*
- DU = ONE - DH12*DH21
-*
- IF (.NOT.DU.LE.ZERO) GO TO 30
-* GO ZERO-H-D-AND-DX1..
- GO TO 60
- 30 CONTINUE
- DFLAG = ZERO
- DD1 = DD1/DU
- DD2 = DD2/DU
- DX1 = DX1*DU
-* GO SCALE-CHECK..
- GO TO 100
- 40 CONTINUE
- IF (.NOT.DQ2.LT.ZERO) GO TO 50
-* GO ZERO-H-D-AND-DX1..
- GO TO 60
- 50 CONTINUE
- DFLAG = ONE
- DH11 = DP1/DP2
- DH22 = DX1/DY1
- DU = ONE + DH11*DH22
- DTEMP = DD2/DU
- DD2 = DD1/DU
- DD1 = DTEMP
- DX1 = DY1*DU
-* GO SCALE-CHECK
- GO TO 100
-* PROCEDURE..ZERO-H-D-AND-DX1..
- 60 CONTINUE
- DFLAG = -ONE
- DH11 = ZERO
- DH12 = ZERO
- DH21 = ZERO
- DH22 = ZERO
-*
- DD1 = ZERO
- DD2 = ZERO
- DX1 = ZERO
-* RETURN..
- GO TO 220
-* PROCEDURE..FIX-H..
- 70 CONTINUE
- IF (.NOT.DFLAG.GE.ZERO) GO TO 90
-*
- IF (.NOT.DFLAG.EQ.ZERO) GO TO 80
- DH11 = ONE
- DH22 = ONE
- DFLAG = -ONE
- GO TO 90
- 80 CONTINUE
- DH21 = -ONE
- DH12 = ONE
- DFLAG = -ONE
- 90 CONTINUE
- GO TO IGO(120,150,180,210)
-* PROCEDURE..SCALE-CHECK
- 100 CONTINUE
- 110 CONTINUE
- IF (.NOT.DD1.LE.RGAMSQ) GO TO 130
- IF (DD1.EQ.ZERO) GO TO 160
- ASSIGN 120 TO IGO
-* FIX-H..
- GO TO 70
- 120 CONTINUE
- DD1 = DD1*GAM**2
- DX1 = DX1/GAM
- DH11 = DH11/GAM
- DH12 = DH12/GAM
- GO TO 110
- 130 CONTINUE
- 140 CONTINUE
- IF (.NOT.DD1.GE.GAMSQ) GO TO 160
- ASSIGN 150 TO IGO
-* FIX-H..
- GO TO 70
- 150 CONTINUE
- DD1 = DD1/GAM**2
- DX1 = DX1*GAM
- DH11 = DH11*GAM
- DH12 = DH12*GAM
- GO TO 140
- 160 CONTINUE
- 170 CONTINUE
- IF (.NOT.DABS(DD2).LE.RGAMSQ) GO TO 190
- IF (DD2.EQ.ZERO) GO TO 220
- ASSIGN 180 TO IGO
-* FIX-H..
- GO TO 70
- 180 CONTINUE
- DD2 = DD2*GAM**2
- DH21 = DH21/GAM
- DH22 = DH22/GAM
- GO TO 170
- 190 CONTINUE
- 200 CONTINUE
- IF (.NOT.DABS(DD2).GE.GAMSQ) GO TO 220
- ASSIGN 210 TO IGO
-* FIX-H..
- GO TO 70
- 210 CONTINUE
- DD2 = DD2/GAM**2
- DH21 = DH21*GAM
- DH22 = DH22*GAM
- GO TO 200
- 220 CONTINUE
- IF (DFLAG) 250,230,240
- 230 CONTINUE
- DPARAM(3) = DH21
- DPARAM(4) = DH12
- GO TO 260
- 240 CONTINUE
- DPARAM(2) = DH11
- DPARAM(5) = DH22
- GO TO 260
- 250 CONTINUE
- DPARAM(2) = DH11
- DPARAM(3) = DH21
- DPARAM(4) = DH12
- DPARAM(5) = DH22
- 260 CONTINUE
- DPARAM(1) = DFLAG
- RETURN
- END
diff --git a/blas/dsbmv.f b/blas/dsbmv.f
deleted file mode 100644
index 8c82d1fa1..000000000
--- a/blas/dsbmv.f
+++ /dev/null
@@ -1,304 +0,0 @@
- SUBROUTINE DSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- DOUBLE PRECISION ALPHA,BETA
- INTEGER INCX,INCY,K,LDA,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* DSBMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n symmetric band matrix, with k super-diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the band matrix A is being supplied as
-* follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* being supplied.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* being supplied.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry, K specifies the number of super-diagonals of the
-* matrix A. K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* ALPHA - DOUBLE PRECISION.
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the symmetric matrix, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer the upper
-* triangular part of a symmetric band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the symmetric matrix, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer the lower
-* triangular part of a symmetric band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - DOUBLE PRECISION array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the
-* vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - DOUBLE PRECISION.
-* On entry, BETA specifies the scalar beta.
-* Unchanged on exit.
-*
-* Y - DOUBLE PRECISION array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the
-* vector y. On exit, Y is overwritten by the updated vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE PRECISION ONE,ZERO
- PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-* ..
-* .. Local Scalars ..
- DOUBLE PRECISION TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC MAX,MIN
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (K.LT.0) THEN
- INFO = 3
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 6
- ELSE IF (INCX.EQ.0) THEN
- INFO = 8
- ELSE IF (INCY.EQ.0) THEN
- INFO = 11
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('DSBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array A
-* are accessed sequentially with one pass through A.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when upper triangle of A is stored.
-*
- KPLUS1 = K + 1
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- L = KPLUS1 - J
- DO 50 I = MAX(1,J-K),J - 1
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(I)
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- L = KPLUS1 - J
- DO 70 I = MAX(1,J-K),J - 1
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- IF (J.GT.K) THEN
- KX = KX + INCX
- KY = KY + INCY
- END IF
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when lower triangle of A is stored.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*A(1,J)
- L = 1 - J
- DO 90 I = J + 1,MIN(N,J+K)
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(I)
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*A(1,J)
- L = 1 - J
- IX = JX
- IY = JY
- DO 110 I = J + 1,MIN(N,J+K)
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of DSBMV .
-*
- END
diff --git a/blas/dspmv.f b/blas/dspmv.f
deleted file mode 100644
index f6e121e76..000000000
--- a/blas/dspmv.f
+++ /dev/null
@@ -1,265 +0,0 @@
- SUBROUTINE DSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- DOUBLE PRECISION ALPHA,BETA
- INTEGER INCX,INCY,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- DOUBLE PRECISION AP(*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* DSPMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n symmetric matrix, supplied in packed form.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the matrix A is supplied in the packed
-* array AP as follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* supplied in AP.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* supplied in AP.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* ALPHA - DOUBLE PRECISION.
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* AP - DOUBLE PRECISION array of DIMENSION at least
-* ( ( n*( n + 1 ) )/2 ).
-* Before entry with UPLO = 'U' or 'u', the array AP must
-* contain the upper triangular part of the symmetric matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-* and a( 2, 2 ) respectively, and so on.
-* Before entry with UPLO = 'L' or 'l', the array AP must
-* contain the lower triangular part of the symmetric matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-* and a( 3, 1 ) respectively, and so on.
-* Unchanged on exit.
-*
-* X - DOUBLE PRECISION array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - DOUBLE PRECISION.
-* On entry, BETA specifies the scalar beta. When BETA is
-* supplied as zero then Y need not be set on input.
-* Unchanged on exit.
-*
-* Y - DOUBLE PRECISION array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the n
-* element vector y. On exit, Y is overwritten by the updated
-* vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE PRECISION ONE,ZERO
- PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-* ..
-* .. Local Scalars ..
- DOUBLE PRECISION TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (INCX.EQ.0) THEN
- INFO = 6
- ELSE IF (INCY.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('DSPMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array AP
-* are accessed sequentially with one pass through AP.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- KK = 1
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when AP contains the upper triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- K = KK
- DO 50 I = 1,J - 1
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(I)
- K = K + 1
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
- KK = KK + J
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- DO 70 K = KK,KK + J - 2
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + J
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when AP contains the lower triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*AP(KK)
- K = KK + 1
- DO 90 I = J + 1,N
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(I)
- K = K + 1
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- KK = KK + (N-J+1)
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*AP(KK)
- IX = JX
- IY = JY
- DO 110 K = KK + 1,KK + N - J
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + (N-J+1)
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of DSPMV .
-*
- END
diff --git a/blas/dtbmv.f b/blas/dtbmv.f
deleted file mode 100644
index a87ffdeae..000000000
--- a/blas/dtbmv.f
+++ /dev/null
@@ -1,335 +0,0 @@
- SUBROUTINE DTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-* .. Scalar Arguments ..
- INTEGER INCX,K,LDA,N
- CHARACTER DIAG,TRANS,UPLO
-* ..
-* .. Array Arguments ..
- DOUBLE PRECISION A(LDA,*),X(*)
-* ..
-*
-* Purpose
-* =======
-*
-* DTBMV performs one of the matrix-vector operations
-*
-* x := A*x, or x := A'*x,
-*
-* where x is an n element vector and A is an n by n unit, or non-unit,
-* upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the matrix is an upper or
-* lower triangular matrix as follows:
-*
-* UPLO = 'U' or 'u' A is an upper triangular matrix.
-*
-* UPLO = 'L' or 'l' A is a lower triangular matrix.
-*
-* Unchanged on exit.
-*
-* TRANS - CHARACTER*1.
-* On entry, TRANS specifies the operation to be performed as
-* follows:
-*
-* TRANS = 'N' or 'n' x := A*x.
-*
-* TRANS = 'T' or 't' x := A'*x.
-*
-* TRANS = 'C' or 'c' x := A'*x.
-*
-* Unchanged on exit.
-*
-* DIAG - CHARACTER*1.
-* On entry, DIAG specifies whether or not A is unit
-* triangular as follows:
-*
-* DIAG = 'U' or 'u' A is assumed to be unit triangular.
-*
-* DIAG = 'N' or 'n' A is not assumed to be unit
-* triangular.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry with UPLO = 'U' or 'u', K specifies the number of
-* super-diagonals of the matrix A.
-* On entry with UPLO = 'L' or 'l', K specifies the number of
-* sub-diagonals of the matrix A.
-* K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer an upper
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer a lower
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Note that when DIAG = 'U' or 'u' the elements of the array A
-* corresponding to the diagonal elements of the matrix are not
-* referenced, but are assumed to be unity.
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - DOUBLE PRECISION array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x. On exit, X is overwritten with the
-* tranformed vector x.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE PRECISION ZERO
- PARAMETER (ZERO=0.0D+0)
-* ..
-* .. Local Scalars ..
- DOUBLE PRECISION TEMP
- INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
- LOGICAL NOUNIT
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC MAX,MIN
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
- + .NOT.LSAME(TRANS,'C')) THEN
- INFO = 2
- ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
- INFO = 3
- ELSE IF (N.LT.0) THEN
- INFO = 4
- ELSE IF (K.LT.0) THEN
- INFO = 5
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 7
- ELSE IF (INCX.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('DTBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF (N.EQ.0) RETURN
-*
- NOUNIT = LSAME(DIAG,'N')
-*
-* Set up the start point in X if the increment is not unity. This
-* will be ( N - 1 )*INCX too small for descending loops.
-*
- IF (INCX.LE.0) THEN
- KX = 1 - (N-1)*INCX
- ELSE IF (INCX.NE.1) THEN
- KX = 1
- END IF
-*
-* Start the operations. In this version the elements of A are
-* accessed sequentially with one pass through A.
-*
- IF (LSAME(TRANS,'N')) THEN
-*
-* Form x := A*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 20 J = 1,N
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = KPLUS1 - J
- DO 10 I = MAX(1,J-K),J - 1
- X(I) = X(I) + TEMP*A(L+I,J)
- 10 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
- END IF
- 20 CONTINUE
- ELSE
- JX = KX
- DO 40 J = 1,N
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = KPLUS1 - J
- DO 30 I = MAX(1,J-K),J - 1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX + INCX
- 30 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
- END IF
- JX = JX + INCX
- IF (J.GT.K) KX = KX + INCX
- 40 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 60 J = N,1,-1
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = 1 - J
- DO 50 I = MIN(N,J+K),J + 1,-1
- X(I) = X(I) + TEMP*A(L+I,J)
- 50 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(1,J)
- END IF
- 60 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 80 J = N,1,-1
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = 1 - J
- DO 70 I = MIN(N,J+K),J + 1,-1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX - INCX
- 70 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(1,J)
- END IF
- JX = JX - INCX
- IF ((N-J).GE.K) KX = KX - INCX
- 80 CONTINUE
- END IF
- END IF
- ELSE
-*
-* Form x := A'*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 100 J = N,1,-1
- TEMP = X(J)
- L = KPLUS1 - J
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 90 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(I)
- 90 CONTINUE
- X(J) = TEMP
- 100 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 120 J = N,1,-1
- TEMP = X(JX)
- KX = KX - INCX
- IX = KX
- L = KPLUS1 - J
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 110 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX - INCX
- 110 CONTINUE
- X(JX) = TEMP
- JX = JX - INCX
- 120 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 140 J = 1,N
- TEMP = X(J)
- L = 1 - J
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 130 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(I)
- 130 CONTINUE
- X(J) = TEMP
- 140 CONTINUE
- ELSE
- JX = KX
- DO 160 J = 1,N
- TEMP = X(JX)
- KX = KX + INCX
- IX = KX
- L = 1 - J
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 150 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX + INCX
- 150 CONTINUE
- X(JX) = TEMP
- JX = JX + INCX
- 160 CONTINUE
- END IF
- END IF
- END IF
-*
- RETURN
-*
-* End of DTBMV .
-*
- END
diff --git a/blas/f2c/chbmv.c b/blas/f2c/chbmv.c
new file mode 100644
index 000000000..f218fe3f5
--- /dev/null
+++ b/blas/f2c/chbmv.c
@@ -0,0 +1,487 @@
+/* chbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int chbmv_(char *uplo, integer *n, integer *k, complex *
+ alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
+ beta, complex *y, integer *incy, ftnlen uplo_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+ real r__1;
+ complex q__1, q__2, q__3, q__4;
+
+ /* Builtin functions */
+ void r_cnjg(complex *, complex *);
+
+ /* Local variables */
+ integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+ complex temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* CHBMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n hermitian band matrix, with k super-diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the band matrix A is being supplied as */
+/* follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* being supplied. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* being supplied. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry, K specifies the number of super-diagonals of the */
+/* matrix A. K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* ALPHA - COMPLEX . */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* A - COMPLEX array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the hermitian matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer the upper */
+/* triangular part of a hermitian band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the hermitian matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer the lower */
+/* triangular part of a hermitian band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Note that the imaginary parts of the diagonal elements need */
+/* not be set and are assumed to be zero. */
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - COMPLEX array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the */
+/* vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - COMPLEX . */
+/* On entry, BETA specifies the scalar beta. */
+/* Unchanged on exit. */
+
+/* Y - COMPLEX array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the */
+/* vector y. On exit, Y is overwritten by the updated vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+ --y;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*k < 0) {
+ info = 3;
+ } else if (*lda < *k + 1) {
+ info = 6;
+ } else if (*incx == 0) {
+ info = 8;
+ } else if (*incy == 0) {
+ info = 11;
+ }
+ if (info != 0) {
+ xerbla_("CHBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f &&
+ beta->i == 0.f))) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array A */
+/* are accessed sequentially with one pass through A. */
+
+/* First form y := beta*y. */
+
+ if (beta->r != 1.f || beta->i != 0.f) {
+ if (*incy == 1) {
+ if (beta->r == 0.f && beta->i == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ y[i__2].r = 0.f, y[i__2].i = 0.f;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (beta->r == 0.f && beta->i == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ y[i__2].r = 0.f, y[i__2].i = 0.f;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ i__3 = iy;
+ q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (alpha->r == 0.f && alpha->i == 0.f) {
+ return 0;
+ }
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when upper triangle of A is stored. */
+
+ kplus1 = *k + 1;
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ i__5 = l + i__ + j * a_dim1;
+ q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__2 = i__;
+ q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i =
+ q__3.r * x[i__2].i + q__3.i * x[i__2].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+/* L50: */
+ }
+ i__4 = j;
+ i__2 = j;
+ i__3 = kplus1 + j * a_dim1;
+ r__1 = a[i__3].r;
+ q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+ q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i;
+ q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+ y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__4 = jx;
+ q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i =
+ alpha->r * x[i__4].i + alpha->i * x[i__4].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ ix = kx;
+ iy = ky;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ i__4 = iy;
+ i__2 = iy;
+ i__5 = l + i__ + j * a_dim1;
+ q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
+ y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__4 = ix;
+ q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
+ q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ i__3 = jy;
+ i__4 = jy;
+ i__2 = kplus1 + j * a_dim1;
+ r__1 = a[i__2].r;
+ q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+ q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i;
+ q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ jx += *incx;
+ jy += *incy;
+ if (j > *k) {
+ kx += *incx;
+ ky += *incy;
+ }
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when lower triangle of A is stored. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = j;
+ q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
+ alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ i__3 = j;
+ i__4 = j;
+ i__2 = j * a_dim1 + 1;
+ r__1 = a[i__2].r;
+ q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ i__4 = i__;
+ i__2 = i__;
+ i__5 = l + i__ + j * a_dim1;
+ q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
+ y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__4 = i__;
+ q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
+ q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+/* L90: */
+ }
+ i__3 = j;
+ i__4 = j;
+ q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = jx;
+ q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
+ alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ i__3 = jy;
+ i__4 = jy;
+ i__2 = j * a_dim1 + 1;
+ r__1 = a[i__2].r;
+ q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ l = 1 - j;
+ ix = jx;
+ iy = jy;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ ix += *incx;
+ iy += *incy;
+ i__4 = iy;
+ i__2 = iy;
+ i__5 = l + i__ + j * a_dim1;
+ q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
+ y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__4 = ix;
+ q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
+ q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+/* L110: */
+ }
+ i__3 = jy;
+ i__4 = jy;
+ q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ jx += *incx;
+ jy += *incy;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of CHBMV . */
+
+} /* chbmv_ */
+
diff --git a/blas/f2c/chpmv.c b/blas/f2c/chpmv.c
new file mode 100644
index 000000000..65bab1c7f
--- /dev/null
+++ b/blas/f2c/chpmv.c
@@ -0,0 +1,438 @@
+/* chpmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int chpmv_(char *uplo, integer *n, complex *alpha, complex *
+ ap, complex *x, integer *incx, complex *beta, complex *y, integer *
+ incy, ftnlen uplo_len)
+{
+ /* System generated locals */
+ integer i__1, i__2, i__3, i__4, i__5;
+ real r__1;
+ complex q__1, q__2, q__3, q__4;
+
+ /* Builtin functions */
+ void r_cnjg(complex *, complex *);
+
+ /* Local variables */
+ integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+ complex temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* CHPMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n hermitian matrix, supplied in packed form. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the matrix A is supplied in the packed */
+/* array AP as follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* supplied in AP. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* supplied in AP. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* ALPHA - COMPLEX . */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* AP - COMPLEX array of DIMENSION at least */
+/* ( ( n*( n + 1 ) )/2 ). */
+/* Before entry with UPLO = 'U' or 'u', the array AP must */
+/* contain the upper triangular part of the hermitian matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+/* and a( 2, 2 ) respectively, and so on. */
+/* Before entry with UPLO = 'L' or 'l', the array AP must */
+/* contain the lower triangular part of the hermitian matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+/* and a( 3, 1 ) respectively, and so on. */
+/* Note that the imaginary parts of the diagonal elements need */
+/* not be set and are assumed to be zero. */
+/* Unchanged on exit. */
+
+/* X - COMPLEX array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - COMPLEX . */
+/* On entry, BETA specifies the scalar beta. When BETA is */
+/* supplied as zero then Y need not be set on input. */
+/* Unchanged on exit. */
+
+/* Y - COMPLEX array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the n */
+/* element vector y. On exit, Y is overwritten by the updated */
+/* vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ --y;
+ --x;
+ --ap;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*incx == 0) {
+ info = 6;
+ } else if (*incy == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("CHPMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f &&
+ beta->i == 0.f))) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array AP */
+/* are accessed sequentially with one pass through AP. */
+
+/* First form y := beta*y. */
+
+ if (beta->r != 1.f || beta->i != 0.f) {
+ if (*incy == 1) {
+ if (beta->r == 0.f && beta->i == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ y[i__2].r = 0.f, y[i__2].i = 0.f;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (beta->r == 0.f && beta->i == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ y[i__2].r = 0.f, y[i__2].i = 0.f;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ i__3 = iy;
+ q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (alpha->r == 0.f && alpha->i == 0.f) {
+ return 0;
+ }
+ kk = 1;
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when AP contains the upper triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ k = kk;
+ i__2 = j - 1;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ i__3 = i__;
+ i__4 = i__;
+ i__5 = k;
+ q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ r_cnjg(&q__3, &ap[k]);
+ i__3 = i__;
+ q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+ q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+ ++k;
+/* L50: */
+ }
+ i__2 = j;
+ i__3 = j;
+ i__4 = kk + j - 1;
+ r__1 = ap[i__4].r;
+ q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+ q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+ q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ kk += j;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = jx;
+ q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ ix = kx;
+ iy = ky;
+ i__2 = kk + j - 2;
+ for (k = kk; k <= i__2; ++k) {
+ i__3 = iy;
+ i__4 = iy;
+ i__5 = k;
+ q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ r_cnjg(&q__3, &ap[k]);
+ i__3 = ix;
+ q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+ q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ i__2 = jy;
+ i__3 = jy;
+ i__4 = kk + j - 1;
+ r__1 = ap[i__4].r;
+ q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+ q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+ q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ jx += *incx;
+ jy += *incy;
+ kk += j;
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when AP contains the lower triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ i__2 = j;
+ i__3 = j;
+ i__4 = kk;
+ r__1 = ap[i__4].r;
+ q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+ q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ k = kk + 1;
+ i__2 = *n;
+ for (i__ = j + 1; i__ <= i__2; ++i__) {
+ i__3 = i__;
+ i__4 = i__;
+ i__5 = k;
+ q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ r_cnjg(&q__3, &ap[k]);
+ i__3 = i__;
+ q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+ q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+ ++k;
+/* L90: */
+ }
+ i__2 = j;
+ i__3 = j;
+ q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ kk += *n - j + 1;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = jx;
+ q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = q__1.r, temp1.i = q__1.i;
+ temp2.r = 0.f, temp2.i = 0.f;
+ i__2 = jy;
+ i__3 = jy;
+ i__4 = kk;
+ r__1 = ap[i__4].r;
+ q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+ q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ ix = jx;
+ iy = jy;
+ i__2 = kk + *n - j;
+ for (k = kk + 1; k <= i__2; ++k) {
+ ix += *incx;
+ iy += *incy;
+ i__3 = iy;
+ i__4 = iy;
+ i__5 = k;
+ q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+ y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+ r_cnjg(&q__3, &ap[k]);
+ i__3 = ix;
+ q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+ q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+ q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+ temp2.r = q__1.r, temp2.i = q__1.i;
+/* L110: */
+ }
+ i__2 = jy;
+ i__3 = jy;
+ q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+ y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+ jx += *incx;
+ jy += *incy;
+ kk += *n - j + 1;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of CHPMV . */
+
+} /* chpmv_ */
+
diff --git a/blas/f2c/complexdots.c b/blas/f2c/complexdots.c
new file mode 100644
index 000000000..a856a231c
--- /dev/null
+++ b/blas/f2c/complexdots.c
@@ -0,0 +1,84 @@
+/* This file has been modified to use the standard gfortran calling
+ convention, rather than the f2c calling convention.
+
+ It does not require -ff2c when compiled with gfortran.
+*/
+
+/* complexdots.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+complex cdotc_(integer *n, complex *cx, integer
+ *incx, complex *cy, integer *incy)
+{
+ complex res;
+ extern /* Subroutine */ int cdotcw_(integer *, complex *, integer *,
+ complex *, integer *, complex *);
+
+ /* Parameter adjustments */
+ --cy;
+ --cx;
+
+ /* Function Body */
+ cdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
+ return res;
+} /* cdotc_ */
+
+complex cdotu_(integer *n, complex *cx, integer
+ *incx, complex *cy, integer *incy)
+{
+ complex res;
+ extern /* Subroutine */ int cdotuw_(integer *, complex *, integer *,
+ complex *, integer *, complex *);
+
+ /* Parameter adjustments */
+ --cy;
+ --cx;
+
+ /* Function Body */
+ cdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
+ return res;
+} /* cdotu_ */
+
+doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx,
+ doublecomplex *cy, integer *incy)
+{
+ doublecomplex res;
+ extern /* Subroutine */ int zdotcw_(integer *, doublecomplex *, integer *,
+ doublecomplex *, integer *, doublecomplex *);
+
+ /* Parameter adjustments */
+ --cy;
+ --cx;
+
+ /* Function Body */
+ zdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
+ return res;
+} /* zdotc_ */
+
+doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx,
+ doublecomplex *cy, integer *incy)
+{
+ doublecomplex res;
+ extern /* Subroutine */ int zdotuw_(integer *, doublecomplex *, integer *,
+ doublecomplex *, integer *, doublecomplex *);
+
+ /* Parameter adjustments */
+ --cy;
+ --cx;
+
+ /* Function Body */
+ zdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
+ return res;
+} /* zdotu_ */
+
diff --git a/blas/f2c/ctbmv.c b/blas/f2c/ctbmv.c
new file mode 100644
index 000000000..790fd581f
--- /dev/null
+++ b/blas/f2c/ctbmv.c
@@ -0,0 +1,647 @@
+/* ctbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int ctbmv_(char *uplo, char *trans, char *diag, integer *n,
+ integer *k, complex *a, integer *lda, complex *x, integer *incx,
+ ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+ complex q__1, q__2, q__3;
+
+ /* Builtin functions */
+ void r_cnjg(complex *, complex *);
+
+ /* Local variables */
+ integer i__, j, l, ix, jx, kx, info;
+ complex temp;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+ logical noconj, nounit;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* CTBMV performs one of the matrix-vector operations */
+
+/* x := A*x, or x := A'*x, or x := conjg( A' )*x, */
+
+/* where x is an n element vector and A is an n by n unit, or non-unit, */
+/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the matrix is an upper or */
+/* lower triangular matrix as follows: */
+
+/* UPLO = 'U' or 'u' A is an upper triangular matrix. */
+
+/* UPLO = 'L' or 'l' A is a lower triangular matrix. */
+
+/* Unchanged on exit. */
+
+/* TRANS - CHARACTER*1. */
+/* On entry, TRANS specifies the operation to be performed as */
+/* follows: */
+
+/* TRANS = 'N' or 'n' x := A*x. */
+
+/* TRANS = 'T' or 't' x := A'*x. */
+
+/* TRANS = 'C' or 'c' x := conjg( A' )*x. */
+
+/* Unchanged on exit. */
+
+/* DIAG - CHARACTER*1. */
+/* On entry, DIAG specifies whether or not A is unit */
+/* triangular as follows: */
+
+/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */
+
+/* DIAG = 'N' or 'n' A is not assumed to be unit */
+/* triangular. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry with UPLO = 'U' or 'u', K specifies the number of */
+/* super-diagonals of the matrix A. */
+/* On entry with UPLO = 'L' or 'l', K specifies the number of */
+/* sub-diagonals of the matrix A. */
+/* K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* A - COMPLEX array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer an upper */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer a lower */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Note that when DIAG = 'U' or 'u' the elements of the array A */
+/* corresponding to the diagonal elements of the matrix are not */
+/* referenced, but are assumed to be unity. */
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - COMPLEX array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. On exit, X is overwritten with the */
+/* tranformed vector x. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans,
+ "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
+ ftnlen)1)) {
+ info = 2;
+ } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag,
+ "N", (ftnlen)1, (ftnlen)1)) {
+ info = 3;
+ } else if (*n < 0) {
+ info = 4;
+ } else if (*k < 0) {
+ info = 5;
+ } else if (*lda < *k + 1) {
+ info = 7;
+ } else if (*incx == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("CTBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0) {
+ return 0;
+ }
+
+ noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
+ nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+
+/* Set up the start point in X if the increment is not unity. This */
+/* will be ( N - 1 )*INCX too small for descending loops. */
+
+ if (*incx <= 0) {
+ kx = 1 - (*n - 1) * *incx;
+ } else if (*incx != 1) {
+ kx = 1;
+ }
+
+/* Start the operations. In this version the elements of A are */
+/* accessed sequentially with one pass through A. */
+
+ if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+
+/* Form x := A*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+ i__2 = j;
+ temp.r = x[i__2].r, temp.i = x[i__2].i;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ i__5 = l + i__ + j * a_dim1;
+ q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+ q__2.i = temp.r * a[i__5].i + temp.i * a[
+ i__5].r;
+ q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
+ q__2.i;
+ x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+/* L10: */
+ }
+ if (nounit) {
+ i__4 = j;
+ i__2 = j;
+ i__3 = kplus1 + j * a_dim1;
+ q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+ i__3].i, q__1.i = x[i__2].r * a[i__3].i +
+ x[i__2].i * a[i__3].r;
+ x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+ }
+ }
+/* L20: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__4 = jx;
+ if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
+ i__4 = jx;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ ix = kx;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ i__4 = ix;
+ i__2 = ix;
+ i__5 = l + i__ + j * a_dim1;
+ q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+ q__2.i = temp.r * a[i__5].i + temp.i * a[
+ i__5].r;
+ q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i +
+ q__2.i;
+ x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+ ix += *incx;
+/* L30: */
+ }
+ if (nounit) {
+ i__3 = jx;
+ i__4 = jx;
+ i__2 = kplus1 + j * a_dim1;
+ q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
+ i__2].i, q__1.i = x[i__4].r * a[i__2].i +
+ x[i__4].i * a[i__2].r;
+ x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+ }
+ }
+ jx += *incx;
+ if (j > *k) {
+ kx += *incx;
+ }
+/* L40: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ i__1 = j;
+ if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
+ i__1 = j;
+ temp.r = x[i__1].r, temp.i = x[i__1].i;
+ l = 1 - j;
+/* Computing MIN */
+ i__1 = *n, i__3 = j + *k;
+ i__4 = j + 1;
+ for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
+ i__1 = i__;
+ i__3 = i__;
+ i__2 = l + i__ + j * a_dim1;
+ q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+ q__2.i = temp.r * a[i__2].i + temp.i * a[
+ i__2].r;
+ q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
+ q__2.i;
+ x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+/* L50: */
+ }
+ if (nounit) {
+ i__4 = j;
+ i__1 = j;
+ i__3 = j * a_dim1 + 1;
+ q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
+ i__3].i, q__1.i = x[i__1].r * a[i__3].i +
+ x[i__1].i * a[i__3].r;
+ x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+ }
+ }
+/* L60: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ i__4 = jx;
+ if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
+ i__4 = jx;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ ix = kx;
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__1 = j + *k;
+ i__3 = j + 1;
+ for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
+ i__4 = ix;
+ i__1 = ix;
+ i__2 = l + i__ + j * a_dim1;
+ q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+ q__2.i = temp.r * a[i__2].i + temp.i * a[
+ i__2].r;
+ q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i +
+ q__2.i;
+ x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+ ix -= *incx;
+/* L70: */
+ }
+ if (nounit) {
+ i__3 = jx;
+ i__4 = jx;
+ i__1 = j * a_dim1 + 1;
+ q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
+ i__1].i, q__1.i = x[i__4].r * a[i__1].i +
+ x[i__4].i * a[i__1].r;
+ x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+ }
+ }
+ jx -= *incx;
+ if (*n - j >= *k) {
+ kx -= *incx;
+ }
+/* L80: */
+ }
+ }
+ }
+ } else {
+
+/* Form x := A'*x or x := conjg( A' )*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ i__3 = j;
+ temp.r = x[i__3].r, temp.i = x[i__3].i;
+ l = kplus1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__3 = kplus1 + j * a_dim1;
+ q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+ q__1.i = temp.r * a[i__3].i + temp.i * a[
+ i__3].r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ i__4 = l + i__ + j * a_dim1;
+ i__1 = i__;
+ q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
+ i__1].i, q__2.i = a[i__4].r * x[i__1].i +
+ a[i__4].i * x[i__1].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+/* L90: */
+ }
+ } else {
+ if (nounit) {
+ r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
+ q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+ q__1.i = temp.r * q__2.i + temp.i *
+ q__2.r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__4 = i__;
+ q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
+ q__2.i = q__3.r * x[i__4].i + q__3.i * x[
+ i__4].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+ }
+ }
+ i__3 = j;
+ x[i__3].r = temp.r, x[i__3].i = temp.i;
+/* L110: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ i__3 = jx;
+ temp.r = x[i__3].r, temp.i = x[i__3].i;
+ kx -= *incx;
+ ix = kx;
+ l = kplus1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__3 = kplus1 + j * a_dim1;
+ q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+ q__1.i = temp.r * a[i__3].i + temp.i * a[
+ i__3].r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ i__4 = l + i__ + j * a_dim1;
+ i__1 = ix;
+ q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
+ i__1].i, q__2.i = a[i__4].r * x[i__1].i +
+ a[i__4].i * x[i__1].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+ ix -= *incx;
+/* L120: */
+ }
+ } else {
+ if (nounit) {
+ r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
+ q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+ q__1.i = temp.r * q__2.i + temp.i *
+ q__2.r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__4 = ix;
+ q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
+ q__2.i = q__3.r * x[i__4].i + q__3.i * x[
+ i__4].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+ ix -= *incx;
+/* L130: */
+ }
+ }
+ i__3 = jx;
+ x[i__3].r = temp.r, x[i__3].i = temp.i;
+ jx -= *incx;
+/* L140: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ i__4 = j;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ l = 1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__4 = j * a_dim1 + 1;
+ q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+ q__1.i = temp.r * a[i__4].i + temp.i * a[
+ i__4].r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ i__1 = l + i__ + j * a_dim1;
+ i__2 = i__;
+ q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+ i__2].i, q__2.i = a[i__1].r * x[i__2].i +
+ a[i__1].i * x[i__2].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+/* L150: */
+ }
+ } else {
+ if (nounit) {
+ r_cnjg(&q__2, &a[j * a_dim1 + 1]);
+ q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+ q__1.i = temp.r * q__2.i + temp.i *
+ q__2.r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__1 = i__;
+ q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
+ q__2.i = q__3.r * x[i__1].i + q__3.i * x[
+ i__1].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+/* L160: */
+ }
+ }
+ i__4 = j;
+ x[i__4].r = temp.r, x[i__4].i = temp.i;
+/* L170: */
+ }
+ } else {
+ jx = kx;
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ i__4 = jx;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ kx += *incx;
+ ix = kx;
+ l = 1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__4 = j * a_dim1 + 1;
+ q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+ q__1.i = temp.r * a[i__4].i + temp.i * a[
+ i__4].r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ i__1 = l + i__ + j * a_dim1;
+ i__2 = ix;
+ q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+ i__2].i, q__2.i = a[i__1].r * x[i__2].i +
+ a[i__1].i * x[i__2].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+ ix += *incx;
+/* L180: */
+ }
+ } else {
+ if (nounit) {
+ r_cnjg(&q__2, &a[j * a_dim1 + 1]);
+ q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+ q__1.i = temp.r * q__2.i + temp.i *
+ q__2.r;
+ temp.r = q__1.r, temp.i = q__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+ i__1 = ix;
+ q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
+ q__2.i = q__3.r * x[i__1].i + q__3.i * x[
+ i__1].r;
+ q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+ q__2.i;
+ temp.r = q__1.r, temp.i = q__1.i;
+ ix += *incx;
+/* L190: */
+ }
+ }
+ i__4 = jx;
+ x[i__4].r = temp.r, x[i__4].i = temp.i;
+ jx += *incx;
+/* L200: */
+ }
+ }
+ }
+ }
+
+ return 0;
+
+/* End of CTBMV . */
+
+} /* ctbmv_ */
+
diff --git a/blas/f2c/d_cnjg.c b/blas/f2c/d_cnjg.c
new file mode 100644
index 000000000..623090c6b
--- /dev/null
+++ b/blas/f2c/d_cnjg.c
@@ -0,0 +1,6 @@
+#include "datatypes.h"
+
+void d_cnjg(doublecomplex *r, doublecomplex *z) {
+ r->r = z->r;
+ r->i = -(z->i);
+}
diff --git a/blas/f2c/datatypes.h b/blas/f2c/datatypes.h
new file mode 100644
index 000000000..63232b246
--- /dev/null
+++ b/blas/f2c/datatypes.h
@@ -0,0 +1,24 @@
+/* This contains a limited subset of the typedefs exposed by f2c
+ for use by the Eigen BLAS C-only implementation.
+*/
+
+#ifndef __EIGEN_DATATYPES_H__
+#define __EIGEN_DATATYPES_H__
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+typedef int ftnlen;
+typedef int logical;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (doublereal)abs(x)
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (doublereal)min(a,b)
+#define dmax(a,b) (doublereal)max(a,b)
+
+#endif
diff --git a/blas/f2c/drotm.c b/blas/f2c/drotm.c
new file mode 100644
index 000000000..17a779b74
--- /dev/null
+++ b/blas/f2c/drotm.c
@@ -0,0 +1,215 @@
+/* drotm.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int drotm_(integer *n, doublereal *dx, integer *incx,
+ doublereal *dy, integer *incy, doublereal *dparam)
+{
+ /* Initialized data */
+
+ static doublereal zero = 0.;
+ static doublereal two = 2.;
+
+ /* System generated locals */
+ integer i__1, i__2;
+
+ /* Local variables */
+ integer i__;
+ doublereal w, z__;
+ integer kx, ky;
+ doublereal dh11, dh12, dh21, dh22, dflag;
+ integer nsteps;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
+
+/* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */
+/* (DY**T) */
+
+/* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
+/* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */
+/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+
+/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */
+
+/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */
+/* H=( ) ( ) ( ) ( ) */
+/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */
+/* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */
+
+/* Arguments */
+/* ========= */
+
+/* N (input) INTEGER */
+/* number of elements in input vector(s) */
+
+/* DX (input/output) DOUBLE PRECISION array, dimension N */
+/* double precision vector with N elements */
+
+/* INCX (input) INTEGER */
+/* storage spacing between elements of DX */
+
+/* DY (input/output) DOUBLE PRECISION array, dimension N */
+/* double precision vector with N elements */
+
+/* INCY (input) INTEGER */
+/* storage spacing between elements of DY */
+
+/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */
+/* DPARAM(1)=DFLAG */
+/* DPARAM(2)=DH11 */
+/* DPARAM(3)=DH21 */
+/* DPARAM(4)=DH12 */
+/* DPARAM(5)=DH22 */
+
+/* ===================================================================== */
+
+/* .. Local Scalars .. */
+/* .. */
+/* .. Data statements .. */
+ /* Parameter adjustments */
+ --dparam;
+ --dy;
+ --dx;
+
+ /* Function Body */
+/* .. */
+
+ dflag = dparam[1];
+ if (*n <= 0 || dflag + two == zero) {
+ goto L140;
+ }
+ if (! (*incx == *incy && *incx > 0)) {
+ goto L70;
+ }
+
+ nsteps = *n * *incx;
+ if (dflag < 0.) {
+ goto L50;
+ } else if (dflag == 0) {
+ goto L10;
+ } else {
+ goto L30;
+ }
+L10:
+ dh12 = dparam[4];
+ dh21 = dparam[3];
+ i__1 = nsteps;
+ i__2 = *incx;
+ for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+ w = dx[i__];
+ z__ = dy[i__];
+ dx[i__] = w + z__ * dh12;
+ dy[i__] = w * dh21 + z__;
+/* L20: */
+ }
+ goto L140;
+L30:
+ dh11 = dparam[2];
+ dh22 = dparam[5];
+ i__2 = nsteps;
+ i__1 = *incx;
+ for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+ w = dx[i__];
+ z__ = dy[i__];
+ dx[i__] = w * dh11 + z__;
+ dy[i__] = -w + dh22 * z__;
+/* L40: */
+ }
+ goto L140;
+L50:
+ dh11 = dparam[2];
+ dh12 = dparam[4];
+ dh21 = dparam[3];
+ dh22 = dparam[5];
+ i__1 = nsteps;
+ i__2 = *incx;
+ for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+ w = dx[i__];
+ z__ = dy[i__];
+ dx[i__] = w * dh11 + z__ * dh12;
+ dy[i__] = w * dh21 + z__ * dh22;
+/* L60: */
+ }
+ goto L140;
+L70:
+ kx = 1;
+ ky = 1;
+ if (*incx < 0) {
+ kx = (1 - *n) * *incx + 1;
+ }
+ if (*incy < 0) {
+ ky = (1 - *n) * *incy + 1;
+ }
+
+ if (dflag < 0.) {
+ goto L120;
+ } else if (dflag == 0) {
+ goto L80;
+ } else {
+ goto L100;
+ }
+L80:
+ dh12 = dparam[4];
+ dh21 = dparam[3];
+ i__2 = *n;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ w = dx[kx];
+ z__ = dy[ky];
+ dx[kx] = w + z__ * dh12;
+ dy[ky] = w * dh21 + z__;
+ kx += *incx;
+ ky += *incy;
+/* L90: */
+ }
+ goto L140;
+L100:
+ dh11 = dparam[2];
+ dh22 = dparam[5];
+ i__2 = *n;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ w = dx[kx];
+ z__ = dy[ky];
+ dx[kx] = w * dh11 + z__;
+ dy[ky] = -w + dh22 * z__;
+ kx += *incx;
+ ky += *incy;
+/* L110: */
+ }
+ goto L140;
+L120:
+ dh11 = dparam[2];
+ dh12 = dparam[4];
+ dh21 = dparam[3];
+ dh22 = dparam[5];
+ i__2 = *n;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ w = dx[kx];
+ z__ = dy[ky];
+ dx[kx] = w * dh11 + z__ * dh12;
+ dy[ky] = w * dh21 + z__ * dh22;
+ kx += *incx;
+ ky += *incy;
+/* L130: */
+ }
+L140:
+ return 0;
+} /* drotm_ */
+
diff --git a/blas/f2c/drotmg.c b/blas/f2c/drotmg.c
new file mode 100644
index 000000000..a63eb1083
--- /dev/null
+++ b/blas/f2c/drotmg.c
@@ -0,0 +1,293 @@
+/* drotmg.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int drotmg_(doublereal *dd1, doublereal *dd2, doublereal *
+ dx1, doublereal *dy1, doublereal *dparam)
+{
+ /* Initialized data */
+
+ static doublereal zero = 0.;
+ static doublereal one = 1.;
+ static doublereal two = 2.;
+ static doublereal gam = 4096.;
+ static doublereal gamsq = 16777216.;
+ static doublereal rgamsq = 5.9604645e-8;
+
+ /* Format strings */
+ static char fmt_120[] = "";
+ static char fmt_150[] = "";
+ static char fmt_180[] = "";
+ static char fmt_210[] = "";
+
+ /* System generated locals */
+ doublereal d__1;
+
+ /* Local variables */
+ doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22;
+ integer igo;
+ doublereal dflag, dtemp;
+
+ /* Assigned format variables */
+ static char *igo_fmt;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
+/* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */
+/* DY2)**T. */
+/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+
+/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */
+
+/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */
+/* H=( ) ( ) ( ) ( ) */
+/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */
+/* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */
+/* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */
+/* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */
+
+/* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
+/* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
+/* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
+
+
+/* Arguments */
+/* ========= */
+
+/* DD1 (input/output) DOUBLE PRECISION */
+
+/* DD2 (input/output) DOUBLE PRECISION */
+
+/* DX1 (input/output) DOUBLE PRECISION */
+
+/* DY1 (input) DOUBLE PRECISION */
+
+/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */
+/* DPARAM(1)=DFLAG */
+/* DPARAM(2)=DH11 */
+/* DPARAM(3)=DH21 */
+/* DPARAM(4)=DH12 */
+/* DPARAM(5)=DH22 */
+
+/* ===================================================================== */
+
+/* .. Local Scalars .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+/* .. Data statements .. */
+
+ /* Parameter adjustments */
+ --dparam;
+
+ /* Function Body */
+/* .. */
+ if (! (*dd1 < zero)) {
+ goto L10;
+ }
+/* GO ZERO-H-D-AND-DX1.. */
+ goto L60;
+L10:
+/* CASE-DD1-NONNEGATIVE */
+ dp2 = *dd2 * *dy1;
+ if (! (dp2 == zero)) {
+ goto L20;
+ }
+ dflag = -two;
+ goto L260;
+/* REGULAR-CASE.. */
+L20:
+ dp1 = *dd1 * *dx1;
+ dq2 = dp2 * *dy1;
+ dq1 = dp1 * *dx1;
+
+ if (! (abs(dq1) > abs(dq2))) {
+ goto L40;
+ }
+ dh21 = -(*dy1) / *dx1;
+ dh12 = dp2 / dp1;
+
+ du = one - dh12 * dh21;
+
+ if (! (du <= zero)) {
+ goto L30;
+ }
+/* GO ZERO-H-D-AND-DX1.. */
+ goto L60;
+L30:
+ dflag = zero;
+ *dd1 /= du;
+ *dd2 /= du;
+ *dx1 *= du;
+/* GO SCALE-CHECK.. */
+ goto L100;
+L40:
+ if (! (dq2 < zero)) {
+ goto L50;
+ }
+/* GO ZERO-H-D-AND-DX1.. */
+ goto L60;
+L50:
+ dflag = one;
+ dh11 = dp1 / dp2;
+ dh22 = *dx1 / *dy1;
+ du = one + dh11 * dh22;
+ dtemp = *dd2 / du;
+ *dd2 = *dd1 / du;
+ *dd1 = dtemp;
+ *dx1 = *dy1 * du;
+/* GO SCALE-CHECK */
+ goto L100;
+/* PROCEDURE..ZERO-H-D-AND-DX1.. */
+L60:
+ dflag = -one;
+ dh11 = zero;
+ dh12 = zero;
+ dh21 = zero;
+ dh22 = zero;
+
+ *dd1 = zero;
+ *dd2 = zero;
+ *dx1 = zero;
+/* RETURN.. */
+ goto L220;
+/* PROCEDURE..FIX-H.. */
+L70:
+ if (! (dflag >= zero)) {
+ goto L90;
+ }
+
+ if (! (dflag == zero)) {
+ goto L80;
+ }
+ dh11 = one;
+ dh22 = one;
+ dflag = -one;
+ goto L90;
+L80:
+ dh21 = -one;
+ dh12 = one;
+ dflag = -one;
+L90:
+ switch (igo) {
+ case 0: goto L120;
+ case 1: goto L150;
+ case 2: goto L180;
+ case 3: goto L210;
+ }
+/* PROCEDURE..SCALE-CHECK */
+L100:
+L110:
+ if (! (*dd1 <= rgamsq)) {
+ goto L130;
+ }
+ if (*dd1 == zero) {
+ goto L160;
+ }
+ igo = 0;
+ igo_fmt = fmt_120;
+/* FIX-H.. */
+ goto L70;
+L120:
+/* Computing 2nd power */
+ d__1 = gam;
+ *dd1 *= d__1 * d__1;
+ *dx1 /= gam;
+ dh11 /= gam;
+ dh12 /= gam;
+ goto L110;
+L130:
+L140:
+ if (! (*dd1 >= gamsq)) {
+ goto L160;
+ }
+ igo = 1;
+ igo_fmt = fmt_150;
+/* FIX-H.. */
+ goto L70;
+L150:
+/* Computing 2nd power */
+ d__1 = gam;
+ *dd1 /= d__1 * d__1;
+ *dx1 *= gam;
+ dh11 *= gam;
+ dh12 *= gam;
+ goto L140;
+L160:
+L170:
+ if (! (abs(*dd2) <= rgamsq)) {
+ goto L190;
+ }
+ if (*dd2 == zero) {
+ goto L220;
+ }
+ igo = 2;
+ igo_fmt = fmt_180;
+/* FIX-H.. */
+ goto L70;
+L180:
+/* Computing 2nd power */
+ d__1 = gam;
+ *dd2 *= d__1 * d__1;
+ dh21 /= gam;
+ dh22 /= gam;
+ goto L170;
+L190:
+L200:
+ if (! (abs(*dd2) >= gamsq)) {
+ goto L220;
+ }
+ igo = 3;
+ igo_fmt = fmt_210;
+/* FIX-H.. */
+ goto L70;
+L210:
+/* Computing 2nd power */
+ d__1 = gam;
+ *dd2 /= d__1 * d__1;
+ dh21 *= gam;
+ dh22 *= gam;
+ goto L200;
+L220:
+ if (dflag < 0.) {
+ goto L250;
+ } else if (dflag == 0) {
+ goto L230;
+ } else {
+ goto L240;
+ }
+L230:
+ dparam[3] = dh21;
+ dparam[4] = dh12;
+ goto L260;
+L240:
+ dparam[2] = dh11;
+ dparam[5] = dh22;
+ goto L260;
+L250:
+ dparam[2] = dh11;
+ dparam[3] = dh21;
+ dparam[4] = dh12;
+ dparam[5] = dh22;
+L260:
+ dparam[1] = dflag;
+ return 0;
+} /* drotmg_ */
+
diff --git a/blas/f2c/dsbmv.c b/blas/f2c/dsbmv.c
new file mode 100644
index 000000000..c6b4b21d6
--- /dev/null
+++ b/blas/f2c/dsbmv.c
@@ -0,0 +1,366 @@
+/* dsbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int dsbmv_(char *uplo, integer *n, integer *k, doublereal *
+ alpha, doublereal *a, integer *lda, doublereal *x, integer *incx,
+ doublereal *beta, doublereal *y, integer *incy, ftnlen uplo_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+ /* Local variables */
+ integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+ doublereal temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DSBMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n symmetric band matrix, with k super-diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the band matrix A is being supplied as */
+/* follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* being supplied. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* being supplied. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry, K specifies the number of super-diagonals of the */
+/* matrix A. K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* ALPHA - DOUBLE PRECISION. */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the symmetric matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer the upper */
+/* triangular part of a symmetric band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the symmetric matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer the lower */
+/* triangular part of a symmetric band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - DOUBLE PRECISION array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the */
+/* vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - DOUBLE PRECISION. */
+/* On entry, BETA specifies the scalar beta. */
+/* Unchanged on exit. */
+
+/* Y - DOUBLE PRECISION array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the */
+/* vector y. On exit, Y is overwritten by the updated vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+ --y;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*k < 0) {
+ info = 3;
+ } else if (*lda < *k + 1) {
+ info = 6;
+ } else if (*incx == 0) {
+ info = 8;
+ } else if (*incy == 0) {
+ info = 11;
+ }
+ if (info != 0) {
+ xerbla_("DSBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array A */
+/* are accessed sequentially with one pass through A. */
+
+/* First form y := beta*y. */
+
+ if (*beta != 1.) {
+ if (*incy == 1) {
+ if (*beta == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = 0.;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = *beta * y[i__];
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (*beta == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = 0.;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = *beta * y[iy];
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (*alpha == 0.) {
+ return 0;
+ }
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when upper triangle of A is stored. */
+
+ kplus1 = *k + 1;
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ y[i__] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[i__];
+/* L50: */
+ }
+ y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.;
+ ix = kx;
+ iy = ky;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ y[iy] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[ix];
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha *
+ temp2;
+ jx += *incx;
+ jy += *incy;
+ if (j > *k) {
+ kx += *incx;
+ ky += *incy;
+ }
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when lower triangle of A is stored. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.;
+ y[j] += temp1 * a[j * a_dim1 + 1];
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ y[i__] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[i__];
+/* L90: */
+ }
+ y[j] += *alpha * temp2;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.;
+ y[jy] += temp1 * a[j * a_dim1 + 1];
+ l = 1 - j;
+ ix = jx;
+ iy = jy;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ ix += *incx;
+ iy += *incy;
+ y[iy] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[ix];
+/* L110: */
+ }
+ y[jy] += *alpha * temp2;
+ jx += *incx;
+ jy += *incy;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of DSBMV . */
+
+} /* dsbmv_ */
+
diff --git a/blas/f2c/dspmv.c b/blas/f2c/dspmv.c
new file mode 100644
index 000000000..0b4e92d5c
--- /dev/null
+++ b/blas/f2c/dspmv.c
@@ -0,0 +1,316 @@
+/* dspmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int dspmv_(char *uplo, integer *n, doublereal *alpha,
+ doublereal *ap, doublereal *x, integer *incx, doublereal *beta,
+ doublereal *y, integer *incy, ftnlen uplo_len)
+{
+ /* System generated locals */
+ integer i__1, i__2;
+
+ /* Local variables */
+ integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+ doublereal temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DSPMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n symmetric matrix, supplied in packed form. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the matrix A is supplied in the packed */
+/* array AP as follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* supplied in AP. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* supplied in AP. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* ALPHA - DOUBLE PRECISION. */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* AP - DOUBLE PRECISION array of DIMENSION at least */
+/* ( ( n*( n + 1 ) )/2 ). */
+/* Before entry with UPLO = 'U' or 'u', the array AP must */
+/* contain the upper triangular part of the symmetric matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+/* and a( 2, 2 ) respectively, and so on. */
+/* Before entry with UPLO = 'L' or 'l', the array AP must */
+/* contain the lower triangular part of the symmetric matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+/* and a( 3, 1 ) respectively, and so on. */
+/* Unchanged on exit. */
+
+/* X - DOUBLE PRECISION array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - DOUBLE PRECISION. */
+/* On entry, BETA specifies the scalar beta. When BETA is */
+/* supplied as zero then Y need not be set on input. */
+/* Unchanged on exit. */
+
+/* Y - DOUBLE PRECISION array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the n */
+/* element vector y. On exit, Y is overwritten by the updated */
+/* vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ --y;
+ --x;
+ --ap;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*incx == 0) {
+ info = 6;
+ } else if (*incy == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("DSPMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array AP */
+/* are accessed sequentially with one pass through AP. */
+
+/* First form y := beta*y. */
+
+ if (*beta != 1.) {
+ if (*incy == 1) {
+ if (*beta == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = 0.;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = *beta * y[i__];
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (*beta == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = 0.;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = *beta * y[iy];
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (*alpha == 0.) {
+ return 0;
+ }
+ kk = 1;
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when AP contains the upper triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.;
+ k = kk;
+ i__2 = j - 1;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ y[i__] += temp1 * ap[k];
+ temp2 += ap[k] * x[i__];
+ ++k;
+/* L50: */
+ }
+ y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+ kk += j;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.;
+ ix = kx;
+ iy = ky;
+ i__2 = kk + j - 2;
+ for (k = kk; k <= i__2; ++k) {
+ y[iy] += temp1 * ap[k];
+ temp2 += ap[k] * x[ix];
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+ jx += *incx;
+ jy += *incy;
+ kk += j;
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when AP contains the lower triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.;
+ y[j] += temp1 * ap[kk];
+ k = kk + 1;
+ i__2 = *n;
+ for (i__ = j + 1; i__ <= i__2; ++i__) {
+ y[i__] += temp1 * ap[k];
+ temp2 += ap[k] * x[i__];
+ ++k;
+/* L90: */
+ }
+ y[j] += *alpha * temp2;
+ kk += *n - j + 1;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.;
+ y[jy] += temp1 * ap[kk];
+ ix = jx;
+ iy = jy;
+ i__2 = kk + *n - j;
+ for (k = kk + 1; k <= i__2; ++k) {
+ ix += *incx;
+ iy += *incy;
+ y[iy] += temp1 * ap[k];
+ temp2 += ap[k] * x[ix];
+/* L110: */
+ }
+ y[jy] += *alpha * temp2;
+ jx += *incx;
+ jy += *incy;
+ kk += *n - j + 1;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of DSPMV . */
+
+} /* dspmv_ */
+
diff --git a/blas/f2c/dtbmv.c b/blas/f2c/dtbmv.c
new file mode 100644
index 000000000..fdf73ebb5
--- /dev/null
+++ b/blas/f2c/dtbmv.c
@@ -0,0 +1,428 @@
+/* dtbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int dtbmv_(char *uplo, char *trans, char *diag, integer *n,
+ integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx,
+ ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+ /* Local variables */
+ integer i__, j, l, ix, jx, kx, info;
+ doublereal temp;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+ logical nounit;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DTBMV performs one of the matrix-vector operations */
+
+/* x := A*x, or x := A'*x, */
+
+/* where x is an n element vector and A is an n by n unit, or non-unit, */
+/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the matrix is an upper or */
+/* lower triangular matrix as follows: */
+
+/* UPLO = 'U' or 'u' A is an upper triangular matrix. */
+
+/* UPLO = 'L' or 'l' A is a lower triangular matrix. */
+
+/* Unchanged on exit. */
+
+/* TRANS - CHARACTER*1. */
+/* On entry, TRANS specifies the operation to be performed as */
+/* follows: */
+
+/* TRANS = 'N' or 'n' x := A*x. */
+
+/* TRANS = 'T' or 't' x := A'*x. */
+
+/* TRANS = 'C' or 'c' x := A'*x. */
+
+/* Unchanged on exit. */
+
+/* DIAG - CHARACTER*1. */
+/* On entry, DIAG specifies whether or not A is unit */
+/* triangular as follows: */
+
+/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */
+
+/* DIAG = 'N' or 'n' A is not assumed to be unit */
+/* triangular. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry with UPLO = 'U' or 'u', K specifies the number of */
+/* super-diagonals of the matrix A. */
+/* On entry with UPLO = 'L' or 'l', K specifies the number of */
+/* sub-diagonals of the matrix A. */
+/* K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer an upper */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer a lower */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Note that when DIAG = 'U' or 'u' the elements of the array A */
+/* corresponding to the diagonal elements of the matrix are not */
+/* referenced, but are assumed to be unity. */
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - DOUBLE PRECISION array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. On exit, X is overwritten with the */
+/* tranformed vector x. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans,
+ "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
+ ftnlen)1)) {
+ info = 2;
+ } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag,
+ "N", (ftnlen)1, (ftnlen)1)) {
+ info = 3;
+ } else if (*n < 0) {
+ info = 4;
+ } else if (*k < 0) {
+ info = 5;
+ } else if (*lda < *k + 1) {
+ info = 7;
+ } else if (*incx == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("DTBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0) {
+ return 0;
+ }
+
+ nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+
+/* Set up the start point in X if the increment is not unity. This */
+/* will be ( N - 1 )*INCX too small for descending loops. */
+
+ if (*incx <= 0) {
+ kx = 1 - (*n - 1) * *incx;
+ } else if (*incx != 1) {
+ kx = 1;
+ }
+
+/* Start the operations. In this version the elements of A are */
+/* accessed sequentially with one pass through A. */
+
+ if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+
+/* Form x := A*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ if (x[j] != 0.) {
+ temp = x[j];
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ x[i__] += temp * a[l + i__ + j * a_dim1];
+/* L10: */
+ }
+ if (nounit) {
+ x[j] *= a[kplus1 + j * a_dim1];
+ }
+ }
+/* L20: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ if (x[jx] != 0.) {
+ temp = x[jx];
+ ix = kx;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ x[ix] += temp * a[l + i__ + j * a_dim1];
+ ix += *incx;
+/* L30: */
+ }
+ if (nounit) {
+ x[jx] *= a[kplus1 + j * a_dim1];
+ }
+ }
+ jx += *incx;
+ if (j > *k) {
+ kx += *incx;
+ }
+/* L40: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ if (x[j] != 0.) {
+ temp = x[j];
+ l = 1 - j;
+/* Computing MIN */
+ i__1 = *n, i__3 = j + *k;
+ i__4 = j + 1;
+ for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
+ x[i__] += temp * a[l + i__ + j * a_dim1];
+/* L50: */
+ }
+ if (nounit) {
+ x[j] *= a[j * a_dim1 + 1];
+ }
+ }
+/* L60: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ if (x[jx] != 0.) {
+ temp = x[jx];
+ ix = kx;
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__1 = j + *k;
+ i__3 = j + 1;
+ for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
+ x[ix] += temp * a[l + i__ + j * a_dim1];
+ ix -= *incx;
+/* L70: */
+ }
+ if (nounit) {
+ x[jx] *= a[j * a_dim1 + 1];
+ }
+ }
+ jx -= *incx;
+ if (*n - j >= *k) {
+ kx -= *incx;
+ }
+/* L80: */
+ }
+ }
+ }
+ } else {
+
+/* Form x := A'*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ temp = x[j];
+ l = kplus1 - j;
+ if (nounit) {
+ temp *= a[kplus1 + j * a_dim1];
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ temp += a[l + i__ + j * a_dim1] * x[i__];
+/* L90: */
+ }
+ x[j] = temp;
+/* L100: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ temp = x[jx];
+ kx -= *incx;
+ ix = kx;
+ l = kplus1 - j;
+ if (nounit) {
+ temp *= a[kplus1 + j * a_dim1];
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ temp += a[l + i__ + j * a_dim1] * x[ix];
+ ix -= *incx;
+/* L110: */
+ }
+ x[jx] = temp;
+ jx -= *incx;
+/* L120: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ temp = x[j];
+ l = 1 - j;
+ if (nounit) {
+ temp *= a[j * a_dim1 + 1];
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ temp += a[l + i__ + j * a_dim1] * x[i__];
+/* L130: */
+ }
+ x[j] = temp;
+/* L140: */
+ }
+ } else {
+ jx = kx;
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ temp = x[jx];
+ kx += *incx;
+ ix = kx;
+ l = 1 - j;
+ if (nounit) {
+ temp *= a[j * a_dim1 + 1];
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ temp += a[l + i__ + j * a_dim1] * x[ix];
+ ix += *incx;
+/* L150: */
+ }
+ x[jx] = temp;
+ jx += *incx;
+/* L160: */
+ }
+ }
+ }
+ }
+
+ return 0;
+
+/* End of DTBMV . */
+
+} /* dtbmv_ */
+
diff --git a/blas/f2c/lsame.c b/blas/f2c/lsame.c
new file mode 100644
index 000000000..46324d916
--- /dev/null
+++ b/blas/f2c/lsame.c
@@ -0,0 +1,117 @@
+/* lsame.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+logical lsame_(char *ca, char *cb, ftnlen ca_len, ftnlen cb_len)
+{
+ /* System generated locals */
+ logical ret_val;
+
+ /* Local variables */
+ integer inta, intb, zcode;
+
+
+/* -- LAPACK auxiliary routine (version 3.1) -- */
+/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/* November 2006 */
+
+/* .. Scalar Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* LSAME returns .TRUE. if CA is the same letter as CB regardless of */
+/* case. */
+
+/* Arguments */
+/* ========= */
+
+/* CA (input) CHARACTER*1 */
+
+/* CB (input) CHARACTER*1 */
+/* CA and CB specify the single characters to be compared. */
+
+/* ===================================================================== */
+
+/* .. Intrinsic Functions .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+
+/* Test if the characters are equal */
+
+ ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+ if (ret_val) {
+ return ret_val;
+ }
+
+/* Now test for equivalence if both characters are alphabetic. */
+
+ zcode = 'Z';
+
+/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */
+/* machines, on which ICHAR returns a value with bit 8 set. */
+/* ICHAR('A') on Prime machines returns 193 which is the same as */
+/* ICHAR('A') on an EBCDIC machine. */
+
+ inta = *(unsigned char *)ca;
+ intb = *(unsigned char *)cb;
+
+ if (zcode == 90 || zcode == 122) {
+
+/* ASCII is assumed - ZCODE is the ASCII code of either lower or */
+/* upper case 'Z'. */
+
+ if (inta >= 97 && inta <= 122) {
+ inta += -32;
+ }
+ if (intb >= 97 && intb <= 122) {
+ intb += -32;
+ }
+
+ } else if (zcode == 233 || zcode == 169) {
+
+/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
+/* upper case 'Z'. */
+
+ if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) ||
+ (inta >= 162 && inta <= 169)) {
+ inta += 64;
+ }
+ if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) ||
+ (intb >= 162 && intb <= 169)) {
+ intb += 64;
+ }
+
+ } else if (zcode == 218 || zcode == 250) {
+
+/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
+/* plus 128 of either lower or upper case 'Z'. */
+
+ if (inta >= 225 && inta <= 250) {
+ inta += -32;
+ }
+ if (intb >= 225 && intb <= 250) {
+ intb += -32;
+ }
+ }
+ ret_val = inta == intb;
+
+/* RETURN */
+
+/* End of LSAME */
+
+ return ret_val;
+} /* lsame_ */
+
diff --git a/blas/f2c/r_cnjg.c b/blas/f2c/r_cnjg.c
new file mode 100644
index 000000000..c08182f88
--- /dev/null
+++ b/blas/f2c/r_cnjg.c
@@ -0,0 +1,6 @@
+#include "datatypes.h"
+
+void r_cnjg(complex *r, complex *z) {
+ r->r = z->r;
+ r->i = -(z->i);
+}
diff --git a/blas/f2c/srotm.c b/blas/f2c/srotm.c
new file mode 100644
index 000000000..bd5944a99
--- /dev/null
+++ b/blas/f2c/srotm.c
@@ -0,0 +1,216 @@
+/* srotm.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int srotm_(integer *n, real *sx, integer *incx, real *sy,
+ integer *incy, real *sparam)
+{
+ /* Initialized data */
+
+ static real zero = 0.f;
+ static real two = 2.f;
+
+ /* System generated locals */
+ integer i__1, i__2;
+
+ /* Local variables */
+ integer i__;
+ real w, z__;
+ integer kx, ky;
+ real sh11, sh12, sh21, sh22, sflag;
+ integer nsteps;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
+
+/* (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */
+/* (DX**T) */
+
+/* SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
+/* LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */
+/* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+
+/* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */
+
+/* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */
+/* H=( ) ( ) ( ) ( ) */
+/* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */
+/* SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */
+
+
+/* Arguments */
+/* ========= */
+
+/* N (input) INTEGER */
+/* number of elements in input vector(s) */
+
+/* SX (input/output) REAL array, dimension N */
+/* double precision vector with N elements */
+
+/* INCX (input) INTEGER */
+/* storage spacing between elements of SX */
+
+/* SY (input/output) REAL array, dimension N */
+/* double precision vector with N elements */
+
+/* INCY (input) INTEGER */
+/* storage spacing between elements of SY */
+
+/* SPARAM (input/output) REAL array, dimension 5 */
+/* SPARAM(1)=SFLAG */
+/* SPARAM(2)=SH11 */
+/* SPARAM(3)=SH21 */
+/* SPARAM(4)=SH12 */
+/* SPARAM(5)=SH22 */
+
+/* ===================================================================== */
+
+/* .. Local Scalars .. */
+/* .. */
+/* .. Data statements .. */
+ /* Parameter adjustments */
+ --sparam;
+ --sy;
+ --sx;
+
+ /* Function Body */
+/* .. */
+
+ sflag = sparam[1];
+ if (*n <= 0 || sflag + two == zero) {
+ goto L140;
+ }
+ if (! (*incx == *incy && *incx > 0)) {
+ goto L70;
+ }
+
+ nsteps = *n * *incx;
+ if (sflag < 0.f) {
+ goto L50;
+ } else if (sflag == 0) {
+ goto L10;
+ } else {
+ goto L30;
+ }
+L10:
+ sh12 = sparam[4];
+ sh21 = sparam[3];
+ i__1 = nsteps;
+ i__2 = *incx;
+ for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+ w = sx[i__];
+ z__ = sy[i__];
+ sx[i__] = w + z__ * sh12;
+ sy[i__] = w * sh21 + z__;
+/* L20: */
+ }
+ goto L140;
+L30:
+ sh11 = sparam[2];
+ sh22 = sparam[5];
+ i__2 = nsteps;
+ i__1 = *incx;
+ for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+ w = sx[i__];
+ z__ = sy[i__];
+ sx[i__] = w * sh11 + z__;
+ sy[i__] = -w + sh22 * z__;
+/* L40: */
+ }
+ goto L140;
+L50:
+ sh11 = sparam[2];
+ sh12 = sparam[4];
+ sh21 = sparam[3];
+ sh22 = sparam[5];
+ i__1 = nsteps;
+ i__2 = *incx;
+ for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+ w = sx[i__];
+ z__ = sy[i__];
+ sx[i__] = w * sh11 + z__ * sh12;
+ sy[i__] = w * sh21 + z__ * sh22;
+/* L60: */
+ }
+ goto L140;
+L70:
+ kx = 1;
+ ky = 1;
+ if (*incx < 0) {
+ kx = (1 - *n) * *incx + 1;
+ }
+ if (*incy < 0) {
+ ky = (1 - *n) * *incy + 1;
+ }
+
+ if (sflag < 0.f) {
+ goto L120;
+ } else if (sflag == 0) {
+ goto L80;
+ } else {
+ goto L100;
+ }
+L80:
+ sh12 = sparam[4];
+ sh21 = sparam[3];
+ i__2 = *n;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ w = sx[kx];
+ z__ = sy[ky];
+ sx[kx] = w + z__ * sh12;
+ sy[ky] = w * sh21 + z__;
+ kx += *incx;
+ ky += *incy;
+/* L90: */
+ }
+ goto L140;
+L100:
+ sh11 = sparam[2];
+ sh22 = sparam[5];
+ i__2 = *n;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ w = sx[kx];
+ z__ = sy[ky];
+ sx[kx] = w * sh11 + z__;
+ sy[ky] = -w + sh22 * z__;
+ kx += *incx;
+ ky += *incy;
+/* L110: */
+ }
+ goto L140;
+L120:
+ sh11 = sparam[2];
+ sh12 = sparam[4];
+ sh21 = sparam[3];
+ sh22 = sparam[5];
+ i__2 = *n;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ w = sx[kx];
+ z__ = sy[ky];
+ sx[kx] = w * sh11 + z__ * sh12;
+ sy[ky] = w * sh21 + z__ * sh22;
+ kx += *incx;
+ ky += *incy;
+/* L130: */
+ }
+L140:
+ return 0;
+} /* srotm_ */
+
diff --git a/blas/f2c/srotmg.c b/blas/f2c/srotmg.c
new file mode 100644
index 000000000..75f789fe2
--- /dev/null
+++ b/blas/f2c/srotmg.c
@@ -0,0 +1,295 @@
+/* srotmg.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real
+ *sparam)
+{
+ /* Initialized data */
+
+ static real zero = 0.f;
+ static real one = 1.f;
+ static real two = 2.f;
+ static real gam = 4096.f;
+ static real gamsq = 16777200.f;
+ static real rgamsq = 5.96046e-8f;
+
+ /* Format strings */
+ static char fmt_120[] = "";
+ static char fmt_150[] = "";
+ static char fmt_180[] = "";
+ static char fmt_210[] = "";
+
+ /* System generated locals */
+ real r__1;
+
+ /* Local variables */
+ real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22;
+ integer igo;
+ real sflag, stemp;
+
+ /* Assigned format variables */
+ static char *igo_fmt;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
+/* THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)* */
+/* SY2)**T. */
+/* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+
+/* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */
+
+/* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */
+/* H=( ) ( ) ( ) ( ) */
+/* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */
+/* LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */
+/* RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */
+/* VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */
+
+/* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
+/* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
+/* OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
+
+
+/* Arguments */
+/* ========= */
+
+
+/* SD1 (input/output) REAL */
+
+/* SD2 (input/output) REAL */
+
+/* SX1 (input/output) REAL */
+
+/* SY1 (input) REAL */
+
+
+/* SPARAM (input/output) REAL array, dimension 5 */
+/* SPARAM(1)=SFLAG */
+/* SPARAM(2)=SH11 */
+/* SPARAM(3)=SH21 */
+/* SPARAM(4)=SH12 */
+/* SPARAM(5)=SH22 */
+
+/* ===================================================================== */
+
+/* .. Local Scalars .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+/* .. Data statements .. */
+
+ /* Parameter adjustments */
+ --sparam;
+
+ /* Function Body */
+/* .. */
+ if (! (*sd1 < zero)) {
+ goto L10;
+ }
+/* GO ZERO-H-D-AND-SX1.. */
+ goto L60;
+L10:
+/* CASE-SD1-NONNEGATIVE */
+ sp2 = *sd2 * *sy1;
+ if (! (sp2 == zero)) {
+ goto L20;
+ }
+ sflag = -two;
+ goto L260;
+/* REGULAR-CASE.. */
+L20:
+ sp1 = *sd1 * *sx1;
+ sq2 = sp2 * *sy1;
+ sq1 = sp1 * *sx1;
+
+ if (! (dabs(sq1) > dabs(sq2))) {
+ goto L40;
+ }
+ sh21 = -(*sy1) / *sx1;
+ sh12 = sp2 / sp1;
+
+ su = one - sh12 * sh21;
+
+ if (! (su <= zero)) {
+ goto L30;
+ }
+/* GO ZERO-H-D-AND-SX1.. */
+ goto L60;
+L30:
+ sflag = zero;
+ *sd1 /= su;
+ *sd2 /= su;
+ *sx1 *= su;
+/* GO SCALE-CHECK.. */
+ goto L100;
+L40:
+ if (! (sq2 < zero)) {
+ goto L50;
+ }
+/* GO ZERO-H-D-AND-SX1.. */
+ goto L60;
+L50:
+ sflag = one;
+ sh11 = sp1 / sp2;
+ sh22 = *sx1 / *sy1;
+ su = one + sh11 * sh22;
+ stemp = *sd2 / su;
+ *sd2 = *sd1 / su;
+ *sd1 = stemp;
+ *sx1 = *sy1 * su;
+/* GO SCALE-CHECK */
+ goto L100;
+/* PROCEDURE..ZERO-H-D-AND-SX1.. */
+L60:
+ sflag = -one;
+ sh11 = zero;
+ sh12 = zero;
+ sh21 = zero;
+ sh22 = zero;
+
+ *sd1 = zero;
+ *sd2 = zero;
+ *sx1 = zero;
+/* RETURN.. */
+ goto L220;
+/* PROCEDURE..FIX-H.. */
+L70:
+ if (! (sflag >= zero)) {
+ goto L90;
+ }
+
+ if (! (sflag == zero)) {
+ goto L80;
+ }
+ sh11 = one;
+ sh22 = one;
+ sflag = -one;
+ goto L90;
+L80:
+ sh21 = -one;
+ sh12 = one;
+ sflag = -one;
+L90:
+ switch (igo) {
+ case 0: goto L120;
+ case 1: goto L150;
+ case 2: goto L180;
+ case 3: goto L210;
+ }
+/* PROCEDURE..SCALE-CHECK */
+L100:
+L110:
+ if (! (*sd1 <= rgamsq)) {
+ goto L130;
+ }
+ if (*sd1 == zero) {
+ goto L160;
+ }
+ igo = 0;
+ igo_fmt = fmt_120;
+/* FIX-H.. */
+ goto L70;
+L120:
+/* Computing 2nd power */
+ r__1 = gam;
+ *sd1 *= r__1 * r__1;
+ *sx1 /= gam;
+ sh11 /= gam;
+ sh12 /= gam;
+ goto L110;
+L130:
+L140:
+ if (! (*sd1 >= gamsq)) {
+ goto L160;
+ }
+ igo = 1;
+ igo_fmt = fmt_150;
+/* FIX-H.. */
+ goto L70;
+L150:
+/* Computing 2nd power */
+ r__1 = gam;
+ *sd1 /= r__1 * r__1;
+ *sx1 *= gam;
+ sh11 *= gam;
+ sh12 *= gam;
+ goto L140;
+L160:
+L170:
+ if (! (dabs(*sd2) <= rgamsq)) {
+ goto L190;
+ }
+ if (*sd2 == zero) {
+ goto L220;
+ }
+ igo = 2;
+ igo_fmt = fmt_180;
+/* FIX-H.. */
+ goto L70;
+L180:
+/* Computing 2nd power */
+ r__1 = gam;
+ *sd2 *= r__1 * r__1;
+ sh21 /= gam;
+ sh22 /= gam;
+ goto L170;
+L190:
+L200:
+ if (! (dabs(*sd2) >= gamsq)) {
+ goto L220;
+ }
+ igo = 3;
+ igo_fmt = fmt_210;
+/* FIX-H.. */
+ goto L70;
+L210:
+/* Computing 2nd power */
+ r__1 = gam;
+ *sd2 /= r__1 * r__1;
+ sh21 *= gam;
+ sh22 *= gam;
+ goto L200;
+L220:
+ if (sflag < 0.f) {
+ goto L250;
+ } else if (sflag == 0) {
+ goto L230;
+ } else {
+ goto L240;
+ }
+L230:
+ sparam[3] = sh21;
+ sparam[4] = sh12;
+ goto L260;
+L240:
+ sparam[2] = sh11;
+ sparam[5] = sh22;
+ goto L260;
+L250:
+ sparam[2] = sh11;
+ sparam[3] = sh21;
+ sparam[4] = sh12;
+ sparam[5] = sh22;
+L260:
+ sparam[1] = sflag;
+ return 0;
+} /* srotmg_ */
+
diff --git a/blas/f2c/ssbmv.c b/blas/f2c/ssbmv.c
new file mode 100644
index 000000000..8599325f2
--- /dev/null
+++ b/blas/f2c/ssbmv.c
@@ -0,0 +1,368 @@
+/* ssbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int ssbmv_(char *uplo, integer *n, integer *k, real *alpha,
+ real *a, integer *lda, real *x, integer *incx, real *beta, real *y,
+ integer *incy, ftnlen uplo_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+ /* Local variables */
+ integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+ real temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* SSBMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n symmetric band matrix, with k super-diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the band matrix A is being supplied as */
+/* follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* being supplied. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* being supplied. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry, K specifies the number of super-diagonals of the */
+/* matrix A. K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* ALPHA - REAL . */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* A - REAL array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the symmetric matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer the upper */
+/* triangular part of a symmetric band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the symmetric matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer the lower */
+/* triangular part of a symmetric band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - REAL array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the */
+/* vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - REAL . */
+/* On entry, BETA specifies the scalar beta. */
+/* Unchanged on exit. */
+
+/* Y - REAL array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the */
+/* vector y. On exit, Y is overwritten by the updated vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+ --y;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*k < 0) {
+ info = 3;
+ } else if (*lda < *k + 1) {
+ info = 6;
+ } else if (*incx == 0) {
+ info = 8;
+ } else if (*incy == 0) {
+ info = 11;
+ }
+ if (info != 0) {
+ xerbla_("SSBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array A */
+/* are accessed sequentially with one pass through A. */
+
+/* First form y := beta*y. */
+
+ if (*beta != 1.f) {
+ if (*incy == 1) {
+ if (*beta == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = 0.f;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = *beta * y[i__];
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (*beta == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = 0.f;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = *beta * y[iy];
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (*alpha == 0.f) {
+ return 0;
+ }
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when upper triangle of A is stored. */
+
+ kplus1 = *k + 1;
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.f;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ y[i__] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[i__];
+/* L50: */
+ }
+ y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.f;
+ ix = kx;
+ iy = ky;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ y[iy] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[ix];
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha *
+ temp2;
+ jx += *incx;
+ jy += *incy;
+ if (j > *k) {
+ kx += *incx;
+ ky += *incy;
+ }
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when lower triangle of A is stored. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.f;
+ y[j] += temp1 * a[j * a_dim1 + 1];
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ y[i__] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[i__];
+/* L90: */
+ }
+ y[j] += *alpha * temp2;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.f;
+ y[jy] += temp1 * a[j * a_dim1 + 1];
+ l = 1 - j;
+ ix = jx;
+ iy = jy;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ ix += *incx;
+ iy += *incy;
+ y[iy] += temp1 * a[l + i__ + j * a_dim1];
+ temp2 += a[l + i__ + j * a_dim1] * x[ix];
+/* L110: */
+ }
+ y[jy] += *alpha * temp2;
+ jx += *incx;
+ jy += *incy;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of SSBMV . */
+
+} /* ssbmv_ */
+
diff --git a/blas/f2c/sspmv.c b/blas/f2c/sspmv.c
new file mode 100644
index 000000000..47858ec6c
--- /dev/null
+++ b/blas/f2c/sspmv.c
@@ -0,0 +1,316 @@
+/* sspmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int sspmv_(char *uplo, integer *n, real *alpha, real *ap,
+ real *x, integer *incx, real *beta, real *y, integer *incy, ftnlen
+ uplo_len)
+{
+ /* System generated locals */
+ integer i__1, i__2;
+
+ /* Local variables */
+ integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+ real temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* SSPMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n symmetric matrix, supplied in packed form. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the matrix A is supplied in the packed */
+/* array AP as follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* supplied in AP. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* supplied in AP. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* ALPHA - REAL . */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* AP - REAL array of DIMENSION at least */
+/* ( ( n*( n + 1 ) )/2 ). */
+/* Before entry with UPLO = 'U' or 'u', the array AP must */
+/* contain the upper triangular part of the symmetric matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+/* and a( 2, 2 ) respectively, and so on. */
+/* Before entry with UPLO = 'L' or 'l', the array AP must */
+/* contain the lower triangular part of the symmetric matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+/* and a( 3, 1 ) respectively, and so on. */
+/* Unchanged on exit. */
+
+/* X - REAL array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - REAL . */
+/* On entry, BETA specifies the scalar beta. When BETA is */
+/* supplied as zero then Y need not be set on input. */
+/* Unchanged on exit. */
+
+/* Y - REAL array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the n */
+/* element vector y. On exit, Y is overwritten by the updated */
+/* vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ --y;
+ --x;
+ --ap;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*incx == 0) {
+ info = 6;
+ } else if (*incy == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("SSPMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array AP */
+/* are accessed sequentially with one pass through AP. */
+
+/* First form y := beta*y. */
+
+ if (*beta != 1.f) {
+ if (*incy == 1) {
+ if (*beta == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = 0.f;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[i__] = *beta * y[i__];
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (*beta == 0.f) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = 0.f;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ y[iy] = *beta * y[iy];
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (*alpha == 0.f) {
+ return 0;
+ }
+ kk = 1;
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when AP contains the upper triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.f;
+ k = kk;
+ i__2 = j - 1;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ y[i__] += temp1 * ap[k];
+ temp2 += ap[k] * x[i__];
+ ++k;
+/* L50: */
+ }
+ y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+ kk += j;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.f;
+ ix = kx;
+ iy = ky;
+ i__2 = kk + j - 2;
+ for (k = kk; k <= i__2; ++k) {
+ y[iy] += temp1 * ap[k];
+ temp2 += ap[k] * x[ix];
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+ jx += *incx;
+ jy += *incy;
+ kk += j;
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when AP contains the lower triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[j];
+ temp2 = 0.f;
+ y[j] += temp1 * ap[kk];
+ k = kk + 1;
+ i__2 = *n;
+ for (i__ = j + 1; i__ <= i__2; ++i__) {
+ y[i__] += temp1 * ap[k];
+ temp2 += ap[k] * x[i__];
+ ++k;
+/* L90: */
+ }
+ y[j] += *alpha * temp2;
+ kk += *n - j + 1;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ temp1 = *alpha * x[jx];
+ temp2 = 0.f;
+ y[jy] += temp1 * ap[kk];
+ ix = jx;
+ iy = jy;
+ i__2 = kk + *n - j;
+ for (k = kk + 1; k <= i__2; ++k) {
+ ix += *incx;
+ iy += *incy;
+ y[iy] += temp1 * ap[k];
+ temp2 += ap[k] * x[ix];
+/* L110: */
+ }
+ y[jy] += *alpha * temp2;
+ jx += *incx;
+ jy += *incy;
+ kk += *n - j + 1;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of SSPMV . */
+
+} /* sspmv_ */
+
diff --git a/blas/f2c/stbmv.c b/blas/f2c/stbmv.c
new file mode 100644
index 000000000..fcf9ce336
--- /dev/null
+++ b/blas/f2c/stbmv.c
@@ -0,0 +1,428 @@
+/* stbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n,
+ integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen
+ uplo_len, ftnlen trans_len, ftnlen diag_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+ /* Local variables */
+ integer i__, j, l, ix, jx, kx, info;
+ real temp;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+ logical nounit;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* STBMV performs one of the matrix-vector operations */
+
+/* x := A*x, or x := A'*x, */
+
+/* where x is an n element vector and A is an n by n unit, or non-unit, */
+/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the matrix is an upper or */
+/* lower triangular matrix as follows: */
+
+/* UPLO = 'U' or 'u' A is an upper triangular matrix. */
+
+/* UPLO = 'L' or 'l' A is a lower triangular matrix. */
+
+/* Unchanged on exit. */
+
+/* TRANS - CHARACTER*1. */
+/* On entry, TRANS specifies the operation to be performed as */
+/* follows: */
+
+/* TRANS = 'N' or 'n' x := A*x. */
+
+/* TRANS = 'T' or 't' x := A'*x. */
+
+/* TRANS = 'C' or 'c' x := A'*x. */
+
+/* Unchanged on exit. */
+
+/* DIAG - CHARACTER*1. */
+/* On entry, DIAG specifies whether or not A is unit */
+/* triangular as follows: */
+
+/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */
+
+/* DIAG = 'N' or 'n' A is not assumed to be unit */
+/* triangular. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry with UPLO = 'U' or 'u', K specifies the number of */
+/* super-diagonals of the matrix A. */
+/* On entry with UPLO = 'L' or 'l', K specifies the number of */
+/* sub-diagonals of the matrix A. */
+/* K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* A - REAL array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer an upper */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer a lower */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Note that when DIAG = 'U' or 'u' the elements of the array A */
+/* corresponding to the diagonal elements of the matrix are not */
+/* referenced, but are assumed to be unity. */
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - REAL array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. On exit, X is overwritten with the */
+/* tranformed vector x. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans,
+ "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
+ ftnlen)1)) {
+ info = 2;
+ } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag,
+ "N", (ftnlen)1, (ftnlen)1)) {
+ info = 3;
+ } else if (*n < 0) {
+ info = 4;
+ } else if (*k < 0) {
+ info = 5;
+ } else if (*lda < *k + 1) {
+ info = 7;
+ } else if (*incx == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("STBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0) {
+ return 0;
+ }
+
+ nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+
+/* Set up the start point in X if the increment is not unity. This */
+/* will be ( N - 1 )*INCX too small for descending loops. */
+
+ if (*incx <= 0) {
+ kx = 1 - (*n - 1) * *incx;
+ } else if (*incx != 1) {
+ kx = 1;
+ }
+
+/* Start the operations. In this version the elements of A are */
+/* accessed sequentially with one pass through A. */
+
+ if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+
+/* Form x := A*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ if (x[j] != 0.f) {
+ temp = x[j];
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ x[i__] += temp * a[l + i__ + j * a_dim1];
+/* L10: */
+ }
+ if (nounit) {
+ x[j] *= a[kplus1 + j * a_dim1];
+ }
+ }
+/* L20: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ if (x[jx] != 0.f) {
+ temp = x[jx];
+ ix = kx;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ x[ix] += temp * a[l + i__ + j * a_dim1];
+ ix += *incx;
+/* L30: */
+ }
+ if (nounit) {
+ x[jx] *= a[kplus1 + j * a_dim1];
+ }
+ }
+ jx += *incx;
+ if (j > *k) {
+ kx += *incx;
+ }
+/* L40: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ if (x[j] != 0.f) {
+ temp = x[j];
+ l = 1 - j;
+/* Computing MIN */
+ i__1 = *n, i__3 = j + *k;
+ i__4 = j + 1;
+ for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
+ x[i__] += temp * a[l + i__ + j * a_dim1];
+/* L50: */
+ }
+ if (nounit) {
+ x[j] *= a[j * a_dim1 + 1];
+ }
+ }
+/* L60: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ if (x[jx] != 0.f) {
+ temp = x[jx];
+ ix = kx;
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__1 = j + *k;
+ i__3 = j + 1;
+ for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
+ x[ix] += temp * a[l + i__ + j * a_dim1];
+ ix -= *incx;
+/* L70: */
+ }
+ if (nounit) {
+ x[jx] *= a[j * a_dim1 + 1];
+ }
+ }
+ jx -= *incx;
+ if (*n - j >= *k) {
+ kx -= *incx;
+ }
+/* L80: */
+ }
+ }
+ }
+ } else {
+
+/* Form x := A'*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ temp = x[j];
+ l = kplus1 - j;
+ if (nounit) {
+ temp *= a[kplus1 + j * a_dim1];
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ temp += a[l + i__ + j * a_dim1] * x[i__];
+/* L90: */
+ }
+ x[j] = temp;
+/* L100: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ temp = x[jx];
+ kx -= *incx;
+ ix = kx;
+ l = kplus1 - j;
+ if (nounit) {
+ temp *= a[kplus1 + j * a_dim1];
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ temp += a[l + i__ + j * a_dim1] * x[ix];
+ ix -= *incx;
+/* L110: */
+ }
+ x[jx] = temp;
+ jx -= *incx;
+/* L120: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ temp = x[j];
+ l = 1 - j;
+ if (nounit) {
+ temp *= a[j * a_dim1 + 1];
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ temp += a[l + i__ + j * a_dim1] * x[i__];
+/* L130: */
+ }
+ x[j] = temp;
+/* L140: */
+ }
+ } else {
+ jx = kx;
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ temp = x[jx];
+ kx += *incx;
+ ix = kx;
+ l = 1 - j;
+ if (nounit) {
+ temp *= a[j * a_dim1 + 1];
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ temp += a[l + i__ + j * a_dim1] * x[ix];
+ ix += *incx;
+/* L150: */
+ }
+ x[jx] = temp;
+ jx += *incx;
+/* L160: */
+ }
+ }
+ }
+ }
+
+ return 0;
+
+/* End of STBMV . */
+
+} /* stbmv_ */
+
diff --git a/blas/f2c/zhbmv.c b/blas/f2c/zhbmv.c
new file mode 100644
index 000000000..42da13dbb
--- /dev/null
+++ b/blas/f2c/zhbmv.c
@@ -0,0 +1,488 @@
+/* zhbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int zhbmv_(char *uplo, integer *n, integer *k, doublecomplex
+ *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *
+ incx, doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen
+ uplo_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+ doublereal d__1;
+ doublecomplex z__1, z__2, z__3, z__4;
+
+ /* Builtin functions */
+ void d_cnjg(doublecomplex *, doublecomplex *);
+
+ /* Local variables */
+ integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+ doublecomplex temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* ZHBMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n hermitian band matrix, with k super-diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the band matrix A is being supplied as */
+/* follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* being supplied. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* being supplied. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry, K specifies the number of super-diagonals of the */
+/* matrix A. K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* ALPHA - COMPLEX*16 . */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the hermitian matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer the upper */
+/* triangular part of a hermitian band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the hermitian matrix, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer the lower */
+/* triangular part of a hermitian band matrix from conventional */
+/* full matrix storage to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Note that the imaginary parts of the diagonal elements need */
+/* not be set and are assumed to be zero. */
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - COMPLEX*16 array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the */
+/* vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - COMPLEX*16 . */
+/* On entry, BETA specifies the scalar beta. */
+/* Unchanged on exit. */
+
+/* Y - COMPLEX*16 array of DIMENSION at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the */
+/* vector y. On exit, Y is overwritten by the updated vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+ --y;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*k < 0) {
+ info = 3;
+ } else if (*lda < *k + 1) {
+ info = 6;
+ } else if (*incx == 0) {
+ info = 8;
+ } else if (*incy == 0) {
+ info = 11;
+ }
+ if (info != 0) {
+ xerbla_("ZHBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. &&
+ beta->i == 0.))) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array A */
+/* are accessed sequentially with one pass through A. */
+
+/* First form y := beta*y. */
+
+ if (beta->r != 1. || beta->i != 0.) {
+ if (*incy == 1) {
+ if (beta->r == 0. && beta->i == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ y[i__2].r = 0., y[i__2].i = 0.;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (beta->r == 0. && beta->i == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ y[i__2].r = 0., y[i__2].i = 0.;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ i__3 = iy;
+ z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (alpha->r == 0. && alpha->i == 0.) {
+ return 0;
+ }
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when upper triangle of A is stored. */
+
+ kplus1 = *k + 1;
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ i__5 = l + i__ + j * a_dim1;
+ z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__2 = i__;
+ z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i =
+ z__3.r * x[i__2].i + z__3.i * x[i__2].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+/* L50: */
+ }
+ i__4 = j;
+ i__2 = j;
+ i__3 = kplus1 + j * a_dim1;
+ d__1 = a[i__3].r;
+ z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+ z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i;
+ z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+ y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__4 = jx;
+ z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i =
+ alpha->r * x[i__4].i + alpha->i * x[i__4].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ ix = kx;
+ iy = ky;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ i__4 = iy;
+ i__2 = iy;
+ i__5 = l + i__ + j * a_dim1;
+ z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
+ y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__4 = ix;
+ z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
+ z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ i__3 = jy;
+ i__4 = jy;
+ i__2 = kplus1 + j * a_dim1;
+ d__1 = a[i__2].r;
+ z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+ z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i;
+ z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ jx += *incx;
+ jy += *incy;
+ if (j > *k) {
+ kx += *incx;
+ ky += *incy;
+ }
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when lower triangle of A is stored. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = j;
+ z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
+ alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ i__3 = j;
+ i__4 = j;
+ i__2 = j * a_dim1 + 1;
+ d__1 = a[i__2].r;
+ z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ i__4 = i__;
+ i__2 = i__;
+ i__5 = l + i__ + j * a_dim1;
+ z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
+ y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__4 = i__;
+ z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
+ z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+/* L90: */
+ }
+ i__3 = j;
+ i__4 = j;
+ z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = jx;
+ z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
+ alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ i__3 = jy;
+ i__4 = jy;
+ i__2 = j * a_dim1 + 1;
+ d__1 = a[i__2].r;
+ z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ l = 1 - j;
+ ix = jx;
+ iy = jy;
+/* Computing MIN */
+ i__4 = *n, i__2 = j + *k;
+ i__3 = min(i__4,i__2);
+ for (i__ = j + 1; i__ <= i__3; ++i__) {
+ ix += *incx;
+ iy += *incy;
+ i__4 = iy;
+ i__2 = iy;
+ i__5 = l + i__ + j * a_dim1;
+ z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+ z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+ .r;
+ z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
+ y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__4 = ix;
+ z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
+ z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+/* L110: */
+ }
+ i__3 = jy;
+ i__4 = jy;
+ z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ jx += *incx;
+ jy += *incy;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of ZHBMV . */
+
+} /* zhbmv_ */
+
diff --git a/blas/f2c/zhpmv.c b/blas/f2c/zhpmv.c
new file mode 100644
index 000000000..fbe2f42b3
--- /dev/null
+++ b/blas/f2c/zhpmv.c
@@ -0,0 +1,438 @@
+/* zhpmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int zhpmv_(char *uplo, integer *n, doublecomplex *alpha,
+ doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex *
+ beta, doublecomplex *y, integer *incy, ftnlen uplo_len)
+{
+ /* System generated locals */
+ integer i__1, i__2, i__3, i__4, i__5;
+ doublereal d__1;
+ doublecomplex z__1, z__2, z__3, z__4;
+
+ /* Builtin functions */
+ void d_cnjg(doublecomplex *, doublecomplex *);
+
+ /* Local variables */
+ integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+ doublecomplex temp1, temp2;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* ZHPMV performs the matrix-vector operation */
+
+/* y := alpha*A*x + beta*y, */
+
+/* where alpha and beta are scalars, x and y are n element vectors and */
+/* A is an n by n hermitian matrix, supplied in packed form. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the upper or lower */
+/* triangular part of the matrix A is supplied in the packed */
+/* array AP as follows: */
+
+/* UPLO = 'U' or 'u' The upper triangular part of A is */
+/* supplied in AP. */
+
+/* UPLO = 'L' or 'l' The lower triangular part of A is */
+/* supplied in AP. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* ALPHA - COMPLEX*16 . */
+/* On entry, ALPHA specifies the scalar alpha. */
+/* Unchanged on exit. */
+
+/* AP - COMPLEX*16 array of DIMENSION at least */
+/* ( ( n*( n + 1 ) )/2 ). */
+/* Before entry with UPLO = 'U' or 'u', the array AP must */
+/* contain the upper triangular part of the hermitian matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+/* and a( 2, 2 ) respectively, and so on. */
+/* Before entry with UPLO = 'L' or 'l', the array AP must */
+/* contain the lower triangular part of the hermitian matrix */
+/* packed sequentially, column by column, so that AP( 1 ) */
+/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+/* and a( 3, 1 ) respectively, and so on. */
+/* Note that the imaginary parts of the diagonal elements need */
+/* not be set and are assumed to be zero. */
+/* Unchanged on exit. */
+
+/* X - COMPLEX*16 array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. */
+/* Unchanged on exit. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* BETA - COMPLEX*16 . */
+/* On entry, BETA specifies the scalar beta. When BETA is */
+/* supplied as zero then Y need not be set on input. */
+/* Unchanged on exit. */
+
+/* Y - COMPLEX*16 array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCY ) ). */
+/* Before entry, the incremented array Y must contain the n */
+/* element vector y. On exit, Y is overwritten by the updated */
+/* vector y. */
+
+/* INCY - INTEGER. */
+/* On entry, INCY specifies the increment for the elements of */
+/* Y. INCY must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ --y;
+ --x;
+ --ap;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (*n < 0) {
+ info = 2;
+ } else if (*incx == 0) {
+ info = 6;
+ } else if (*incy == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("ZHPMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. &&
+ beta->i == 0.))) {
+ return 0;
+ }
+
+/* Set up the start points in X and Y. */
+
+ if (*incx > 0) {
+ kx = 1;
+ } else {
+ kx = 1 - (*n - 1) * *incx;
+ }
+ if (*incy > 0) {
+ ky = 1;
+ } else {
+ ky = 1 - (*n - 1) * *incy;
+ }
+
+/* Start the operations. In this version the elements of the array AP */
+/* are accessed sequentially with one pass through AP. */
+
+/* First form y := beta*y. */
+
+ if (beta->r != 1. || beta->i != 0.) {
+ if (*incy == 1) {
+ if (beta->r == 0. && beta->i == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ y[i__2].r = 0., y[i__2].i = 0.;
+/* L10: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+/* L20: */
+ }
+ }
+ } else {
+ iy = ky;
+ if (beta->r == 0. && beta->i == 0.) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ y[i__2].r = 0., y[i__2].i = 0.;
+ iy += *incy;
+/* L30: */
+ }
+ } else {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ i__2 = iy;
+ i__3 = iy;
+ z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+ z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+ .r;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ iy += *incy;
+/* L40: */
+ }
+ }
+ }
+ }
+ if (alpha->r == 0. && alpha->i == 0.) {
+ return 0;
+ }
+ kk = 1;
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+
+/* Form y when AP contains the upper triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ k = kk;
+ i__2 = j - 1;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ i__3 = i__;
+ i__4 = i__;
+ i__5 = k;
+ z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ d_cnjg(&z__3, &ap[k]);
+ i__3 = i__;
+ z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+ z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+ ++k;
+/* L50: */
+ }
+ i__2 = j;
+ i__3 = j;
+ i__4 = kk + j - 1;
+ d__1 = ap[i__4].r;
+ z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+ z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
+ z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ kk += j;
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = jx;
+ z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ ix = kx;
+ iy = ky;
+ i__2 = kk + j - 2;
+ for (k = kk; k <= i__2; ++k) {
+ i__3 = iy;
+ i__4 = iy;
+ i__5 = k;
+ z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ d_cnjg(&z__3, &ap[k]);
+ i__3 = ix;
+ z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+ z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+ ix += *incx;
+ iy += *incy;
+/* L70: */
+ }
+ i__2 = jy;
+ i__3 = jy;
+ i__4 = kk + j - 1;
+ d__1 = ap[i__4].r;
+ z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+ z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
+ z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ jx += *incx;
+ jy += *incy;
+ kk += j;
+/* L80: */
+ }
+ }
+ } else {
+
+/* Form y when AP contains the lower triangle. */
+
+ if (*incx == 1 && *incy == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ i__2 = j;
+ i__3 = j;
+ i__4 = kk;
+ d__1 = ap[i__4].r;
+ z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+ z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ k = kk + 1;
+ i__2 = *n;
+ for (i__ = j + 1; i__ <= i__2; ++i__) {
+ i__3 = i__;
+ i__4 = i__;
+ i__5 = k;
+ z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ d_cnjg(&z__3, &ap[k]);
+ i__3 = i__;
+ z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+ z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+ ++k;
+/* L90: */
+ }
+ i__2 = j;
+ i__3 = j;
+ z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ kk += *n - j + 1;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ jy = ky;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = jx;
+ z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+ alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+ temp1.r = z__1.r, temp1.i = z__1.i;
+ temp2.r = 0., temp2.i = 0.;
+ i__2 = jy;
+ i__3 = jy;
+ i__4 = kk;
+ d__1 = ap[i__4].r;
+ z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+ z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ ix = jx;
+ iy = jy;
+ i__2 = kk + *n - j;
+ for (k = kk + 1; k <= i__2; ++k) {
+ ix += *incx;
+ iy += *incy;
+ i__3 = iy;
+ i__4 = iy;
+ i__5 = k;
+ z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i,
+ z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
+ .r;
+ z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+ y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+ d_cnjg(&z__3, &ap[k]);
+ i__3 = ix;
+ z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+ z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+ z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+ temp2.r = z__1.r, temp2.i = z__1.i;
+/* L110: */
+ }
+ i__2 = jy;
+ i__3 = jy;
+ z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i =
+ alpha->r * temp2.i + alpha->i * temp2.r;
+ z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+ y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+ jx += *incx;
+ jy += *incy;
+ kk += *n - j + 1;
+/* L120: */
+ }
+ }
+ }
+
+ return 0;
+
+/* End of ZHPMV . */
+
+} /* zhpmv_ */
+
diff --git a/blas/f2c/ztbmv.c b/blas/f2c/ztbmv.c
new file mode 100644
index 000000000..4cdcd7f88
--- /dev/null
+++ b/blas/f2c/ztbmv.c
@@ -0,0 +1,647 @@
+/* ztbmv.f -- translated by f2c (version 20100827).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "datatypes.h"
+
+/* Subroutine */ int ztbmv_(char *uplo, char *trans, char *diag, integer *n,
+ integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer
+ *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+ doublecomplex z__1, z__2, z__3;
+
+ /* Builtin functions */
+ void d_cnjg(doublecomplex *, doublecomplex *);
+
+ /* Local variables */
+ integer i__, j, l, ix, jx, kx, info;
+ doublecomplex temp;
+ extern logical lsame_(char *, char *, ftnlen, ftnlen);
+ integer kplus1;
+ extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+ logical noconj, nounit;
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* ZTBMV performs one of the matrix-vector operations */
+
+/* x := A*x, or x := A'*x, or x := conjg( A' )*x, */
+
+/* where x is an n element vector and A is an n by n unit, or non-unit, */
+/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+
+/* Arguments */
+/* ========== */
+
+/* UPLO - CHARACTER*1. */
+/* On entry, UPLO specifies whether the matrix is an upper or */
+/* lower triangular matrix as follows: */
+
+/* UPLO = 'U' or 'u' A is an upper triangular matrix. */
+
+/* UPLO = 'L' or 'l' A is a lower triangular matrix. */
+
+/* Unchanged on exit. */
+
+/* TRANS - CHARACTER*1. */
+/* On entry, TRANS specifies the operation to be performed as */
+/* follows: */
+
+/* TRANS = 'N' or 'n' x := A*x. */
+
+/* TRANS = 'T' or 't' x := A'*x. */
+
+/* TRANS = 'C' or 'c' x := conjg( A' )*x. */
+
+/* Unchanged on exit. */
+
+/* DIAG - CHARACTER*1. */
+/* On entry, DIAG specifies whether or not A is unit */
+/* triangular as follows: */
+
+/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */
+
+/* DIAG = 'N' or 'n' A is not assumed to be unit */
+/* triangular. */
+
+/* Unchanged on exit. */
+
+/* N - INTEGER. */
+/* On entry, N specifies the order of the matrix A. */
+/* N must be at least zero. */
+/* Unchanged on exit. */
+
+/* K - INTEGER. */
+/* On entry with UPLO = 'U' or 'u', K specifies the number of */
+/* super-diagonals of the matrix A. */
+/* On entry with UPLO = 'L' or 'l', K specifies the number of */
+/* sub-diagonals of the matrix A. */
+/* K must satisfy 0 .le. K. */
+/* Unchanged on exit. */
+
+/* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */
+/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+/* by n part of the array A must contain the upper triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row */
+/* ( k + 1 ) of the array, the first super-diagonal starting at */
+/* position 2 in row k, and so on. The top left k by k triangle */
+/* of the array A is not referenced. */
+/* The following program segment will transfer an upper */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = K + 1 - J */
+/* DO 10, I = MAX( 1, J - K ), J */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+/* by n part of the array A must contain the lower triangular */
+/* band part of the matrix of coefficients, supplied column by */
+/* column, with the leading diagonal of the matrix in row 1 of */
+/* the array, the first sub-diagonal starting at position 1 in */
+/* row 2, and so on. The bottom right k by k triangle of the */
+/* array A is not referenced. */
+/* The following program segment will transfer a lower */
+/* triangular band matrix from conventional full matrix storage */
+/* to band storage: */
+
+/* DO 20, J = 1, N */
+/* M = 1 - J */
+/* DO 10, I = J, MIN( N, J + K ) */
+/* A( M + I, J ) = matrix( I, J ) */
+/* 10 CONTINUE */
+/* 20 CONTINUE */
+
+/* Note that when DIAG = 'U' or 'u' the elements of the array A */
+/* corresponding to the diagonal elements of the matrix are not */
+/* referenced, but are assumed to be unity. */
+/* Unchanged on exit. */
+
+/* LDA - INTEGER. */
+/* On entry, LDA specifies the first dimension of A as declared */
+/* in the calling (sub) program. LDA must be at least */
+/* ( k + 1 ). */
+/* Unchanged on exit. */
+
+/* X - COMPLEX*16 array of dimension at least */
+/* ( 1 + ( n - 1 )*abs( INCX ) ). */
+/* Before entry, the incremented array X must contain the n */
+/* element vector x. On exit, X is overwritten with the */
+/* tranformed vector x. */
+
+/* INCX - INTEGER. */
+/* On entry, INCX specifies the increment for the elements of */
+/* X. INCX must not be zero. */
+/* Unchanged on exit. */
+
+/* Further Details */
+/* =============== */
+
+/* Level 2 Blas routine. */
+
+/* -- Written on 22-October-1986. */
+/* Jack Dongarra, Argonne National Lab. */
+/* Jeremy Du Croz, Nag Central Office. */
+/* Sven Hammarling, Nag Central Office. */
+/* Richard Hanson, Sandia National Labs. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+
+/* Test the input parameters. */
+
+ /* Parameter adjustments */
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --x;
+
+ /* Function Body */
+ info = 0;
+ if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
+ ftnlen)1, (ftnlen)1)) {
+ info = 1;
+ } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans,
+ "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
+ ftnlen)1)) {
+ info = 2;
+ } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag,
+ "N", (ftnlen)1, (ftnlen)1)) {
+ info = 3;
+ } else if (*n < 0) {
+ info = 4;
+ } else if (*k < 0) {
+ info = 5;
+ } else if (*lda < *k + 1) {
+ info = 7;
+ } else if (*incx == 0) {
+ info = 9;
+ }
+ if (info != 0) {
+ xerbla_("ZTBMV ", &info, (ftnlen)6);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0) {
+ return 0;
+ }
+
+ noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
+ nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+
+/* Set up the start point in X if the increment is not unity. This */
+/* will be ( N - 1 )*INCX too small for descending loops. */
+
+ if (*incx <= 0) {
+ kx = 1 - (*n - 1) * *incx;
+ } else if (*incx != 1) {
+ kx = 1;
+ }
+
+/* Start the operations. In this version the elements of A are */
+/* accessed sequentially with one pass through A. */
+
+ if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+
+/* Form x := A*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ if (x[i__2].r != 0. || x[i__2].i != 0.) {
+ i__2 = j;
+ temp.r = x[i__2].r, temp.i = x[i__2].i;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__2 = 1, i__3 = j - *k;
+ i__4 = j - 1;
+ for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
+ i__2 = i__;
+ i__3 = i__;
+ i__5 = l + i__ + j * a_dim1;
+ z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+ z__2.i = temp.r * a[i__5].i + temp.i * a[
+ i__5].r;
+ z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i +
+ z__2.i;
+ x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+/* L10: */
+ }
+ if (nounit) {
+ i__4 = j;
+ i__2 = j;
+ i__3 = kplus1 + j * a_dim1;
+ z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+ i__3].i, z__1.i = x[i__2].r * a[i__3].i +
+ x[i__2].i * a[i__3].r;
+ x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+ }
+ }
+/* L20: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__4 = jx;
+ if (x[i__4].r != 0. || x[i__4].i != 0.) {
+ i__4 = jx;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ ix = kx;
+ l = kplus1 - j;
+/* Computing MAX */
+ i__4 = 1, i__2 = j - *k;
+ i__3 = j - 1;
+ for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
+ i__4 = ix;
+ i__2 = ix;
+ i__5 = l + i__ + j * a_dim1;
+ z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+ z__2.i = temp.r * a[i__5].i + temp.i * a[
+ i__5].r;
+ z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i +
+ z__2.i;
+ x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+ ix += *incx;
+/* L30: */
+ }
+ if (nounit) {
+ i__3 = jx;
+ i__4 = jx;
+ i__2 = kplus1 + j * a_dim1;
+ z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
+ i__2].i, z__1.i = x[i__4].r * a[i__2].i +
+ x[i__4].i * a[i__2].r;
+ x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+ }
+ }
+ jx += *incx;
+ if (j > *k) {
+ kx += *incx;
+ }
+/* L40: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ i__1 = j;
+ if (x[i__1].r != 0. || x[i__1].i != 0.) {
+ i__1 = j;
+ temp.r = x[i__1].r, temp.i = x[i__1].i;
+ l = 1 - j;
+/* Computing MIN */
+ i__1 = *n, i__3 = j + *k;
+ i__4 = j + 1;
+ for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
+ i__1 = i__;
+ i__3 = i__;
+ i__2 = l + i__ + j * a_dim1;
+ z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+ z__2.i = temp.r * a[i__2].i + temp.i * a[
+ i__2].r;
+ z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i +
+ z__2.i;
+ x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+/* L50: */
+ }
+ if (nounit) {
+ i__4 = j;
+ i__1 = j;
+ i__3 = j * a_dim1 + 1;
+ z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
+ i__3].i, z__1.i = x[i__1].r * a[i__3].i +
+ x[i__1].i * a[i__3].r;
+ x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+ }
+ }
+/* L60: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ i__4 = jx;
+ if (x[i__4].r != 0. || x[i__4].i != 0.) {
+ i__4 = jx;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ ix = kx;
+ l = 1 - j;
+/* Computing MIN */
+ i__4 = *n, i__1 = j + *k;
+ i__3 = j + 1;
+ for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
+ i__4 = ix;
+ i__1 = ix;
+ i__2 = l + i__ + j * a_dim1;
+ z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+ z__2.i = temp.r * a[i__2].i + temp.i * a[
+ i__2].r;
+ z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i +
+ z__2.i;
+ x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+ ix -= *incx;
+/* L70: */
+ }
+ if (nounit) {
+ i__3 = jx;
+ i__4 = jx;
+ i__1 = j * a_dim1 + 1;
+ z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
+ i__1].i, z__1.i = x[i__4].r * a[i__1].i +
+ x[i__4].i * a[i__1].r;
+ x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+ }
+ }
+ jx -= *incx;
+ if (*n - j >= *k) {
+ kx -= *incx;
+ }
+/* L80: */
+ }
+ }
+ }
+ } else {
+
+/* Form x := A'*x or x := conjg( A' )*x. */
+
+ if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
+ kplus1 = *k + 1;
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ i__3 = j;
+ temp.r = x[i__3].r, temp.i = x[i__3].i;
+ l = kplus1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__3 = kplus1 + j * a_dim1;
+ z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+ z__1.i = temp.r * a[i__3].i + temp.i * a[
+ i__3].r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ i__4 = l + i__ + j * a_dim1;
+ i__1 = i__;
+ z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
+ i__1].i, z__2.i = a[i__4].r * x[i__1].i +
+ a[i__4].i * x[i__1].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+/* L90: */
+ }
+ } else {
+ if (nounit) {
+ d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
+ z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+ z__1.i = temp.r * z__2.i + temp.i *
+ z__2.r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__4 = i__;
+ z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i,
+ z__2.i = z__3.r * x[i__4].i + z__3.i * x[
+ i__4].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+/* L100: */
+ }
+ }
+ i__3 = j;
+ x[i__3].r = temp.r, x[i__3].i = temp.i;
+/* L110: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ i__3 = jx;
+ temp.r = x[i__3].r, temp.i = x[i__3].i;
+ kx -= *incx;
+ ix = kx;
+ l = kplus1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__3 = kplus1 + j * a_dim1;
+ z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+ z__1.i = temp.r * a[i__3].i + temp.i * a[
+ i__3].r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ i__4 = l + i__ + j * a_dim1;
+ i__1 = ix;
+ z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
+ i__1].i, z__2.i = a[i__4].r * x[i__1].i +
+ a[i__4].i * x[i__1].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+ ix -= *incx;
+/* L120: */
+ }
+ } else {
+ if (nounit) {
+ d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
+ z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+ z__1.i = temp.r * z__2.i + temp.i *
+ z__2.r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MAX */
+ i__4 = 1, i__1 = j - *k;
+ i__3 = max(i__4,i__1);
+ for (i__ = j - 1; i__ >= i__3; --i__) {
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__4 = ix;
+ z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i,
+ z__2.i = z__3.r * x[i__4].i + z__3.i * x[
+ i__4].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+ ix -= *incx;
+/* L130: */
+ }
+ }
+ i__3 = jx;
+ x[i__3].r = temp.r, x[i__3].i = temp.i;
+ jx -= *incx;
+/* L140: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ i__4 = j;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ l = 1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__4 = j * a_dim1 + 1;
+ z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+ z__1.i = temp.r * a[i__4].i + temp.i * a[
+ i__4].r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ i__1 = l + i__ + j * a_dim1;
+ i__2 = i__;
+ z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+ i__2].i, z__2.i = a[i__1].r * x[i__2].i +
+ a[i__1].i * x[i__2].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+/* L150: */
+ }
+ } else {
+ if (nounit) {
+ d_cnjg(&z__2, &a[j * a_dim1 + 1]);
+ z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+ z__1.i = temp.r * z__2.i + temp.i *
+ z__2.r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__1 = i__;
+ z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i,
+ z__2.i = z__3.r * x[i__1].i + z__3.i * x[
+ i__1].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+/* L160: */
+ }
+ }
+ i__4 = j;
+ x[i__4].r = temp.r, x[i__4].i = temp.i;
+/* L170: */
+ }
+ } else {
+ jx = kx;
+ i__3 = *n;
+ for (j = 1; j <= i__3; ++j) {
+ i__4 = jx;
+ temp.r = x[i__4].r, temp.i = x[i__4].i;
+ kx += *incx;
+ ix = kx;
+ l = 1 - j;
+ if (noconj) {
+ if (nounit) {
+ i__4 = j * a_dim1 + 1;
+ z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+ z__1.i = temp.r * a[i__4].i + temp.i * a[
+ i__4].r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ i__1 = l + i__ + j * a_dim1;
+ i__2 = ix;
+ z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+ i__2].i, z__2.i = a[i__1].r * x[i__2].i +
+ a[i__1].i * x[i__2].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+ ix += *incx;
+/* L180: */
+ }
+ } else {
+ if (nounit) {
+ d_cnjg(&z__2, &a[j * a_dim1 + 1]);
+ z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+ z__1.i = temp.r * z__2.i + temp.i *
+ z__2.r;
+ temp.r = z__1.r, temp.i = z__1.i;
+ }
+/* Computing MIN */
+ i__1 = *n, i__2 = j + *k;
+ i__4 = min(i__1,i__2);
+ for (i__ = j + 1; i__ <= i__4; ++i__) {
+ d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+ i__1 = ix;
+ z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i,
+ z__2.i = z__3.r * x[i__1].i + z__3.i * x[
+ i__1].r;
+ z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+ z__2.i;
+ temp.r = z__1.r, temp.i = z__1.i;
+ ix += *incx;
+/* L190: */
+ }
+ }
+ i__4 = jx;
+ x[i__4].r = temp.r, x[i__4].i = temp.i;
+ jx += *incx;
+/* L200: */
+ }
+ }
+ }
+ }
+
+ return 0;
+
+/* End of ZTBMV . */
+
+} /* ztbmv_ */
+
diff --git a/blas/complexdots.f b/blas/fortran/complexdots.f
index a7da51d16..a7da51d16 100644
--- a/blas/complexdots.f
+++ b/blas/fortran/complexdots.f
diff --git a/blas/lsame.f b/blas/lsame.f
deleted file mode 100644
index f53690268..000000000
--- a/blas/lsame.f
+++ /dev/null
@@ -1,85 +0,0 @@
- LOGICAL FUNCTION LSAME(CA,CB)
-*
-* -- LAPACK auxiliary routine (version 3.1) --
-* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-* November 2006
-*
-* .. Scalar Arguments ..
- CHARACTER CA,CB
-* ..
-*
-* Purpose
-* =======
-*
-* LSAME returns .TRUE. if CA is the same letter as CB regardless of
-* case.
-*
-* Arguments
-* =========
-*
-* CA (input) CHARACTER*1
-*
-* CB (input) CHARACTER*1
-* CA and CB specify the single characters to be compared.
-*
-* =====================================================================
-*
-* .. Intrinsic Functions ..
- INTRINSIC ICHAR
-* ..
-* .. Local Scalars ..
- INTEGER INTA,INTB,ZCODE
-* ..
-*
-* Test if the characters are equal
-*
- LSAME = CA .EQ. CB
- IF (LSAME) RETURN
-*
-* Now test for equivalence if both characters are alphabetic.
-*
- ZCODE = ICHAR('Z')
-*
-* Use 'Z' rather than 'A' so that ASCII can be detected on Prime
-* machines, on which ICHAR returns a value with bit 8 set.
-* ICHAR('A') on Prime machines returns 193 which is the same as
-* ICHAR('A') on an EBCDIC machine.
-*
- INTA = ICHAR(CA)
- INTB = ICHAR(CB)
-*
- IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN
-*
-* ASCII is assumed - ZCODE is the ASCII code of either lower or
-* upper case 'Z'.
-*
- IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32
- IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32
-*
- ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN
-*
-* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
-* upper case 'Z'.
-*
- IF (INTA.GE.129 .AND. INTA.LE.137 .OR.
- + INTA.GE.145 .AND. INTA.LE.153 .OR.
- + INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64
- IF (INTB.GE.129 .AND. INTB.LE.137 .OR.
- + INTB.GE.145 .AND. INTB.LE.153 .OR.
- + INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64
-*
- ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN
-*
-* ASCII is assumed, on Prime machines - ZCODE is the ASCII code
-* plus 128 of either lower or upper case 'Z'.
-*
- IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32
- IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32
- END IF
- LSAME = INTA .EQ. INTB
-*
-* RETURN
-*
-* End of LSAME
-*
- END
diff --git a/blas/srotm.f b/blas/srotm.f
deleted file mode 100644
index fc5a59333..000000000
--- a/blas/srotm.f
+++ /dev/null
@@ -1,148 +0,0 @@
- SUBROUTINE SROTM(N,SX,INCX,SY,INCY,SPARAM)
-* .. Scalar Arguments ..
- INTEGER INCX,INCY,N
-* ..
-* .. Array Arguments ..
- REAL SPARAM(5),SX(*),SY(*)
-* ..
-*
-* Purpose
-* =======
-*
-* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX
-*
-* (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN
-* (DX**T)
-*
-* SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
-* LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY.
-* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*
-* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0
-*
-* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0)
-* H=( ) ( ) ( ) ( )
-* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0).
-* SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM.
-*
-*
-* Arguments
-* =========
-*
-* N (input) INTEGER
-* number of elements in input vector(s)
-*
-* SX (input/output) REAL array, dimension N
-* double precision vector with N elements
-*
-* INCX (input) INTEGER
-* storage spacing between elements of SX
-*
-* SY (input/output) REAL array, dimension N
-* double precision vector with N elements
-*
-* INCY (input) INTEGER
-* storage spacing between elements of SY
-*
-* SPARAM (input/output) REAL array, dimension 5
-* SPARAM(1)=SFLAG
-* SPARAM(2)=SH11
-* SPARAM(3)=SH21
-* SPARAM(4)=SH12
-* SPARAM(5)=SH22
-*
-* =====================================================================
-*
-* .. Local Scalars ..
- REAL SFLAG,SH11,SH12,SH21,SH22,TWO,W,Z,ZERO
- INTEGER I,KX,KY,NSTEPS
-* ..
-* .. Data statements ..
- DATA ZERO,TWO/0.E0,2.E0/
-* ..
-*
- SFLAG = SPARAM(1)
- IF (N.LE.0 .OR. (SFLAG+TWO.EQ.ZERO)) GO TO 140
- IF (.NOT. (INCX.EQ.INCY.AND.INCX.GT.0)) GO TO 70
-*
- NSTEPS = N*INCX
- IF (SFLAG) 50,10,30
- 10 CONTINUE
- SH12 = SPARAM(4)
- SH21 = SPARAM(3)
- DO 20 I = 1,NSTEPS,INCX
- W = SX(I)
- Z = SY(I)
- SX(I) = W + Z*SH12
- SY(I) = W*SH21 + Z
- 20 CONTINUE
- GO TO 140
- 30 CONTINUE
- SH11 = SPARAM(2)
- SH22 = SPARAM(5)
- DO 40 I = 1,NSTEPS,INCX
- W = SX(I)
- Z = SY(I)
- SX(I) = W*SH11 + Z
- SY(I) = -W + SH22*Z
- 40 CONTINUE
- GO TO 140
- 50 CONTINUE
- SH11 = SPARAM(2)
- SH12 = SPARAM(4)
- SH21 = SPARAM(3)
- SH22 = SPARAM(5)
- DO 60 I = 1,NSTEPS,INCX
- W = SX(I)
- Z = SY(I)
- SX(I) = W*SH11 + Z*SH12
- SY(I) = W*SH21 + Z*SH22
- 60 CONTINUE
- GO TO 140
- 70 CONTINUE
- KX = 1
- KY = 1
- IF (INCX.LT.0) KX = 1 + (1-N)*INCX
- IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-*
- IF (SFLAG) 120,80,100
- 80 CONTINUE
- SH12 = SPARAM(4)
- SH21 = SPARAM(3)
- DO 90 I = 1,N
- W = SX(KX)
- Z = SY(KY)
- SX(KX) = W + Z*SH12
- SY(KY) = W*SH21 + Z
- KX = KX + INCX
- KY = KY + INCY
- 90 CONTINUE
- GO TO 140
- 100 CONTINUE
- SH11 = SPARAM(2)
- SH22 = SPARAM(5)
- DO 110 I = 1,N
- W = SX(KX)
- Z = SY(KY)
- SX(KX) = W*SH11 + Z
- SY(KY) = -W + SH22*Z
- KX = KX + INCX
- KY = KY + INCY
- 110 CONTINUE
- GO TO 140
- 120 CONTINUE
- SH11 = SPARAM(2)
- SH12 = SPARAM(4)
- SH21 = SPARAM(3)
- SH22 = SPARAM(5)
- DO 130 I = 1,N
- W = SX(KX)
- Z = SY(KY)
- SX(KX) = W*SH11 + Z*SH12
- SY(KY) = W*SH21 + Z*SH22
- KX = KX + INCX
- KY = KY + INCY
- 130 CONTINUE
- 140 CONTINUE
- RETURN
- END
diff --git a/blas/srotmg.f b/blas/srotmg.f
deleted file mode 100644
index 7b3bd4272..000000000
--- a/blas/srotmg.f
+++ /dev/null
@@ -1,208 +0,0 @@
- SUBROUTINE SROTMG(SD1,SD2,SX1,SY1,SPARAM)
-* .. Scalar Arguments ..
- REAL SD1,SD2,SX1,SY1
-* ..
-* .. Array Arguments ..
- REAL SPARAM(5)
-* ..
-*
-* Purpose
-* =======
-*
-* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS
-* THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)*
-* SY2)**T.
-* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*
-* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0
-*
-* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0)
-* H=( ) ( ) ( ) ( )
-* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0).
-* LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22
-* RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE
-* VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.)
-*
-* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE
-* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE
-* OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM.
-*
-*
-* Arguments
-* =========
-*
-*
-* SD1 (input/output) REAL
-*
-* SD2 (input/output) REAL
-*
-* SX1 (input/output) REAL
-*
-* SY1 (input) REAL
-*
-*
-* SPARAM (input/output) REAL array, dimension 5
-* SPARAM(1)=SFLAG
-* SPARAM(2)=SH11
-* SPARAM(3)=SH21
-* SPARAM(4)=SH12
-* SPARAM(5)=SH22
-*
-* =====================================================================
-*
-* .. Local Scalars ..
- REAL GAM,GAMSQ,ONE,RGAMSQ,SFLAG,SH11,SH12,SH21,SH22,SP1,SP2,SQ1,
- + SQ2,STEMP,SU,TWO,ZERO
- INTEGER IGO
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC ABS
-* ..
-* .. Data statements ..
-*
- DATA ZERO,ONE,TWO/0.E0,1.E0,2.E0/
- DATA GAM,GAMSQ,RGAMSQ/4096.E0,1.67772E7,5.96046E-8/
-* ..
-
- IF (.NOT.SD1.LT.ZERO) GO TO 10
-* GO ZERO-H-D-AND-SX1..
- GO TO 60
- 10 CONTINUE
-* CASE-SD1-NONNEGATIVE
- SP2 = SD2*SY1
- IF (.NOT.SP2.EQ.ZERO) GO TO 20
- SFLAG = -TWO
- GO TO 260
-* REGULAR-CASE..
- 20 CONTINUE
- SP1 = SD1*SX1
- SQ2 = SP2*SY1
- SQ1 = SP1*SX1
-*
- IF (.NOT.ABS(SQ1).GT.ABS(SQ2)) GO TO 40
- SH21 = -SY1/SX1
- SH12 = SP2/SP1
-*
- SU = ONE - SH12*SH21
-*
- IF (.NOT.SU.LE.ZERO) GO TO 30
-* GO ZERO-H-D-AND-SX1..
- GO TO 60
- 30 CONTINUE
- SFLAG = ZERO
- SD1 = SD1/SU
- SD2 = SD2/SU
- SX1 = SX1*SU
-* GO SCALE-CHECK..
- GO TO 100
- 40 CONTINUE
- IF (.NOT.SQ2.LT.ZERO) GO TO 50
-* GO ZERO-H-D-AND-SX1..
- GO TO 60
- 50 CONTINUE
- SFLAG = ONE
- SH11 = SP1/SP2
- SH22 = SX1/SY1
- SU = ONE + SH11*SH22
- STEMP = SD2/SU
- SD2 = SD1/SU
- SD1 = STEMP
- SX1 = SY1*SU
-* GO SCALE-CHECK
- GO TO 100
-* PROCEDURE..ZERO-H-D-AND-SX1..
- 60 CONTINUE
- SFLAG = -ONE
- SH11 = ZERO
- SH12 = ZERO
- SH21 = ZERO
- SH22 = ZERO
-*
- SD1 = ZERO
- SD2 = ZERO
- SX1 = ZERO
-* RETURN..
- GO TO 220
-* PROCEDURE..FIX-H..
- 70 CONTINUE
- IF (.NOT.SFLAG.GE.ZERO) GO TO 90
-*
- IF (.NOT.SFLAG.EQ.ZERO) GO TO 80
- SH11 = ONE
- SH22 = ONE
- SFLAG = -ONE
- GO TO 90
- 80 CONTINUE
- SH21 = -ONE
- SH12 = ONE
- SFLAG = -ONE
- 90 CONTINUE
- GO TO IGO(120,150,180,210)
-* PROCEDURE..SCALE-CHECK
- 100 CONTINUE
- 110 CONTINUE
- IF (.NOT.SD1.LE.RGAMSQ) GO TO 130
- IF (SD1.EQ.ZERO) GO TO 160
- ASSIGN 120 TO IGO
-* FIX-H..
- GO TO 70
- 120 CONTINUE
- SD1 = SD1*GAM**2
- SX1 = SX1/GAM
- SH11 = SH11/GAM
- SH12 = SH12/GAM
- GO TO 110
- 130 CONTINUE
- 140 CONTINUE
- IF (.NOT.SD1.GE.GAMSQ) GO TO 160
- ASSIGN 150 TO IGO
-* FIX-H..
- GO TO 70
- 150 CONTINUE
- SD1 = SD1/GAM**2
- SX1 = SX1*GAM
- SH11 = SH11*GAM
- SH12 = SH12*GAM
- GO TO 140
- 160 CONTINUE
- 170 CONTINUE
- IF (.NOT.ABS(SD2).LE.RGAMSQ) GO TO 190
- IF (SD2.EQ.ZERO) GO TO 220
- ASSIGN 180 TO IGO
-* FIX-H..
- GO TO 70
- 180 CONTINUE
- SD2 = SD2*GAM**2
- SH21 = SH21/GAM
- SH22 = SH22/GAM
- GO TO 170
- 190 CONTINUE
- 200 CONTINUE
- IF (.NOT.ABS(SD2).GE.GAMSQ) GO TO 220
- ASSIGN 210 TO IGO
-* FIX-H..
- GO TO 70
- 210 CONTINUE
- SD2 = SD2/GAM**2
- SH21 = SH21*GAM
- SH22 = SH22*GAM
- GO TO 200
- 220 CONTINUE
- IF (SFLAG) 250,230,240
- 230 CONTINUE
- SPARAM(3) = SH21
- SPARAM(4) = SH12
- GO TO 260
- 240 CONTINUE
- SPARAM(2) = SH11
- SPARAM(5) = SH22
- GO TO 260
- 250 CONTINUE
- SPARAM(2) = SH11
- SPARAM(3) = SH21
- SPARAM(4) = SH12
- SPARAM(5) = SH22
- 260 CONTINUE
- SPARAM(1) = SFLAG
- RETURN
- END
diff --git a/blas/ssbmv.f b/blas/ssbmv.f
deleted file mode 100644
index 16893a295..000000000
--- a/blas/ssbmv.f
+++ /dev/null
@@ -1,306 +0,0 @@
- SUBROUTINE SSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- REAL ALPHA,BETA
- INTEGER INCX,INCY,K,LDA,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- REAL A(LDA,*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* SSBMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n symmetric band matrix, with k super-diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the band matrix A is being supplied as
-* follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* being supplied.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* being supplied.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry, K specifies the number of super-diagonals of the
-* matrix A. K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* ALPHA - REAL .
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* A - REAL array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the symmetric matrix, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer the upper
-* triangular part of a symmetric band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the symmetric matrix, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer the lower
-* triangular part of a symmetric band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - REAL array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the
-* vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - REAL .
-* On entry, BETA specifies the scalar beta.
-* Unchanged on exit.
-*
-* Y - REAL array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the
-* vector y. On exit, Y is overwritten by the updated vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- REAL ONE,ZERO
- PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-* ..
-* .. Local Scalars ..
- REAL TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC MAX,MIN
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (K.LT.0) THEN
- INFO = 3
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 6
- ELSE IF (INCX.EQ.0) THEN
- INFO = 8
- ELSE IF (INCY.EQ.0) THEN
- INFO = 11
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('SSBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array A
-* are accessed sequentially with one pass through A.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when upper triangle of A is stored.
-*
- KPLUS1 = K + 1
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- L = KPLUS1 - J
- DO 50 I = MAX(1,J-K),J - 1
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(I)
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- L = KPLUS1 - J
- DO 70 I = MAX(1,J-K),J - 1
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- IF (J.GT.K) THEN
- KX = KX + INCX
- KY = KY + INCY
- END IF
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when lower triangle of A is stored.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*A(1,J)
- L = 1 - J
- DO 90 I = J + 1,MIN(N,J+K)
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(I)
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*A(1,J)
- L = 1 - J
- IX = JX
- IY = JY
- DO 110 I = J + 1,MIN(N,J+K)
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + A(L+I,J)*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of SSBMV .
-*
- END
diff --git a/blas/sspmv.f b/blas/sspmv.f
deleted file mode 100644
index 0b8449824..000000000
--- a/blas/sspmv.f
+++ /dev/null
@@ -1,265 +0,0 @@
- SUBROUTINE SSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- REAL ALPHA,BETA
- INTEGER INCX,INCY,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- REAL AP(*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* SSPMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n symmetric matrix, supplied in packed form.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the matrix A is supplied in the packed
-* array AP as follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* supplied in AP.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* supplied in AP.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* ALPHA - REAL .
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* AP - REAL array of DIMENSION at least
-* ( ( n*( n + 1 ) )/2 ).
-* Before entry with UPLO = 'U' or 'u', the array AP must
-* contain the upper triangular part of the symmetric matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-* and a( 2, 2 ) respectively, and so on.
-* Before entry with UPLO = 'L' or 'l', the array AP must
-* contain the lower triangular part of the symmetric matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-* and a( 3, 1 ) respectively, and so on.
-* Unchanged on exit.
-*
-* X - REAL array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - REAL .
-* On entry, BETA specifies the scalar beta. When BETA is
-* supplied as zero then Y need not be set on input.
-* Unchanged on exit.
-*
-* Y - REAL array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the n
-* element vector y. On exit, Y is overwritten by the updated
-* vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- REAL ONE,ZERO
- PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-* ..
-* .. Local Scalars ..
- REAL TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (INCX.EQ.0) THEN
- INFO = 6
- ELSE IF (INCY.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('SSPMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array AP
-* are accessed sequentially with one pass through AP.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- KK = 1
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when AP contains the upper triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- K = KK
- DO 50 I = 1,J - 1
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(I)
- K = K + 1
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
- KK = KK + J
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- DO 70 K = KK,KK + J - 2
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + J
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when AP contains the lower triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*AP(KK)
- K = KK + 1
- DO 90 I = J + 1,N
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(I)
- K = K + 1
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- KK = KK + (N-J+1)
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*AP(KK)
- IX = JX
- IY = JY
- DO 110 K = KK + 1,KK + N - J
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + AP(K)*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + (N-J+1)
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of SSPMV .
-*
- END
diff --git a/blas/stbmv.f b/blas/stbmv.f
deleted file mode 100644
index c0b8f1136..000000000
--- a/blas/stbmv.f
+++ /dev/null
@@ -1,335 +0,0 @@
- SUBROUTINE STBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-* .. Scalar Arguments ..
- INTEGER INCX,K,LDA,N
- CHARACTER DIAG,TRANS,UPLO
-* ..
-* .. Array Arguments ..
- REAL A(LDA,*),X(*)
-* ..
-*
-* Purpose
-* =======
-*
-* STBMV performs one of the matrix-vector operations
-*
-* x := A*x, or x := A'*x,
-*
-* where x is an n element vector and A is an n by n unit, or non-unit,
-* upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the matrix is an upper or
-* lower triangular matrix as follows:
-*
-* UPLO = 'U' or 'u' A is an upper triangular matrix.
-*
-* UPLO = 'L' or 'l' A is a lower triangular matrix.
-*
-* Unchanged on exit.
-*
-* TRANS - CHARACTER*1.
-* On entry, TRANS specifies the operation to be performed as
-* follows:
-*
-* TRANS = 'N' or 'n' x := A*x.
-*
-* TRANS = 'T' or 't' x := A'*x.
-*
-* TRANS = 'C' or 'c' x := A'*x.
-*
-* Unchanged on exit.
-*
-* DIAG - CHARACTER*1.
-* On entry, DIAG specifies whether or not A is unit
-* triangular as follows:
-*
-* DIAG = 'U' or 'u' A is assumed to be unit triangular.
-*
-* DIAG = 'N' or 'n' A is not assumed to be unit
-* triangular.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry with UPLO = 'U' or 'u', K specifies the number of
-* super-diagonals of the matrix A.
-* On entry with UPLO = 'L' or 'l', K specifies the number of
-* sub-diagonals of the matrix A.
-* K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* A - REAL array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer an upper
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer a lower
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Note that when DIAG = 'U' or 'u' the elements of the array A
-* corresponding to the diagonal elements of the matrix are not
-* referenced, but are assumed to be unity.
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - REAL array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x. On exit, X is overwritten with the
-* tranformed vector x.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- REAL ZERO
- PARAMETER (ZERO=0.0E+0)
-* ..
-* .. Local Scalars ..
- REAL TEMP
- INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
- LOGICAL NOUNIT
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC MAX,MIN
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
- + .NOT.LSAME(TRANS,'C')) THEN
- INFO = 2
- ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
- INFO = 3
- ELSE IF (N.LT.0) THEN
- INFO = 4
- ELSE IF (K.LT.0) THEN
- INFO = 5
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 7
- ELSE IF (INCX.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('STBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF (N.EQ.0) RETURN
-*
- NOUNIT = LSAME(DIAG,'N')
-*
-* Set up the start point in X if the increment is not unity. This
-* will be ( N - 1 )*INCX too small for descending loops.
-*
- IF (INCX.LE.0) THEN
- KX = 1 - (N-1)*INCX
- ELSE IF (INCX.NE.1) THEN
- KX = 1
- END IF
-*
-* Start the operations. In this version the elements of A are
-* accessed sequentially with one pass through A.
-*
- IF (LSAME(TRANS,'N')) THEN
-*
-* Form x := A*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 20 J = 1,N
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = KPLUS1 - J
- DO 10 I = MAX(1,J-K),J - 1
- X(I) = X(I) + TEMP*A(L+I,J)
- 10 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
- END IF
- 20 CONTINUE
- ELSE
- JX = KX
- DO 40 J = 1,N
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = KPLUS1 - J
- DO 30 I = MAX(1,J-K),J - 1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX + INCX
- 30 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
- END IF
- JX = JX + INCX
- IF (J.GT.K) KX = KX + INCX
- 40 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 60 J = N,1,-1
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = 1 - J
- DO 50 I = MIN(N,J+K),J + 1,-1
- X(I) = X(I) + TEMP*A(L+I,J)
- 50 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(1,J)
- END IF
- 60 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 80 J = N,1,-1
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = 1 - J
- DO 70 I = MIN(N,J+K),J + 1,-1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX - INCX
- 70 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(1,J)
- END IF
- JX = JX - INCX
- IF ((N-J).GE.K) KX = KX - INCX
- 80 CONTINUE
- END IF
- END IF
- ELSE
-*
-* Form x := A'*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 100 J = N,1,-1
- TEMP = X(J)
- L = KPLUS1 - J
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 90 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(I)
- 90 CONTINUE
- X(J) = TEMP
- 100 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 120 J = N,1,-1
- TEMP = X(JX)
- KX = KX - INCX
- IX = KX
- L = KPLUS1 - J
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 110 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX - INCX
- 110 CONTINUE
- X(JX) = TEMP
- JX = JX - INCX
- 120 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 140 J = 1,N
- TEMP = X(J)
- L = 1 - J
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 130 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(I)
- 130 CONTINUE
- X(J) = TEMP
- 140 CONTINUE
- ELSE
- JX = KX
- DO 160 J = 1,N
- TEMP = X(JX)
- KX = KX + INCX
- IX = KX
- L = 1 - J
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 150 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX + INCX
- 150 CONTINUE
- X(JX) = TEMP
- JX = JX + INCX
- 160 CONTINUE
- END IF
- END IF
- END IF
-*
- RETURN
-*
-* End of STBMV .
-*
- END
diff --git a/blas/zhbmv.f b/blas/zhbmv.f
deleted file mode 100644
index bca0da5fc..000000000
--- a/blas/zhbmv.f
+++ /dev/null
@@ -1,310 +0,0 @@
- SUBROUTINE ZHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- DOUBLE COMPLEX ALPHA,BETA
- INTEGER INCX,INCY,K,LDA,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- DOUBLE COMPLEX A(LDA,*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* ZHBMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n hermitian band matrix, with k super-diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the band matrix A is being supplied as
-* follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* being supplied.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* being supplied.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry, K specifies the number of super-diagonals of the
-* matrix A. K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* ALPHA - COMPLEX*16 .
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* A - COMPLEX*16 array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the hermitian matrix, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer the upper
-* triangular part of a hermitian band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the hermitian matrix, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer the lower
-* triangular part of a hermitian band matrix from conventional
-* full matrix storage to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Note that the imaginary parts of the diagonal elements need
-* not be set and are assumed to be zero.
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - COMPLEX*16 array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the
-* vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - COMPLEX*16 .
-* On entry, BETA specifies the scalar beta.
-* Unchanged on exit.
-*
-* Y - COMPLEX*16 array of DIMENSION at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the
-* vector y. On exit, Y is overwritten by the updated vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE COMPLEX ONE
- PARAMETER (ONE= (1.0D+0,0.0D+0))
- DOUBLE COMPLEX ZERO
- PARAMETER (ZERO= (0.0D+0,0.0D+0))
-* ..
-* .. Local Scalars ..
- DOUBLE COMPLEX TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC DBLE,DCONJG,MAX,MIN
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (K.LT.0) THEN
- INFO = 3
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 6
- ELSE IF (INCX.EQ.0) THEN
- INFO = 8
- ELSE IF (INCY.EQ.0) THEN
- INFO = 11
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('ZHBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array A
-* are accessed sequentially with one pass through A.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when upper triangle of A is stored.
-*
- KPLUS1 = K + 1
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- L = KPLUS1 - J
- DO 50 I = MAX(1,J-K),J - 1
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I)
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- L = KPLUS1 - J
- DO 70 I = MAX(1,J-K),J - 1
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- IF (J.GT.K) THEN
- KX = KX + INCX
- KY = KY + INCY
- END IF
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when lower triangle of A is stored.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*DBLE(A(1,J))
- L = 1 - J
- DO 90 I = J + 1,MIN(N,J+K)
- Y(I) = Y(I) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I)
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*DBLE(A(1,J))
- L = 1 - J
- IX = JX
- IY = JY
- DO 110 I = J + 1,MIN(N,J+K)
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*A(L+I,J)
- TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of ZHBMV .
-*
- END
diff --git a/blas/zhpmv.f b/blas/zhpmv.f
deleted file mode 100644
index b686108b3..000000000
--- a/blas/zhpmv.f
+++ /dev/null
@@ -1,272 +0,0 @@
- SUBROUTINE ZHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-* .. Scalar Arguments ..
- DOUBLE COMPLEX ALPHA,BETA
- INTEGER INCX,INCY,N
- CHARACTER UPLO
-* ..
-* .. Array Arguments ..
- DOUBLE COMPLEX AP(*),X(*),Y(*)
-* ..
-*
-* Purpose
-* =======
-*
-* ZHPMV performs the matrix-vector operation
-*
-* y := alpha*A*x + beta*y,
-*
-* where alpha and beta are scalars, x and y are n element vectors and
-* A is an n by n hermitian matrix, supplied in packed form.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the upper or lower
-* triangular part of the matrix A is supplied in the packed
-* array AP as follows:
-*
-* UPLO = 'U' or 'u' The upper triangular part of A is
-* supplied in AP.
-*
-* UPLO = 'L' or 'l' The lower triangular part of A is
-* supplied in AP.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* ALPHA - COMPLEX*16 .
-* On entry, ALPHA specifies the scalar alpha.
-* Unchanged on exit.
-*
-* AP - COMPLEX*16 array of DIMENSION at least
-* ( ( n*( n + 1 ) )/2 ).
-* Before entry with UPLO = 'U' or 'u', the array AP must
-* contain the upper triangular part of the hermitian matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-* and a( 2, 2 ) respectively, and so on.
-* Before entry with UPLO = 'L' or 'l', the array AP must
-* contain the lower triangular part of the hermitian matrix
-* packed sequentially, column by column, so that AP( 1 )
-* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-* and a( 3, 1 ) respectively, and so on.
-* Note that the imaginary parts of the diagonal elements need
-* not be set and are assumed to be zero.
-* Unchanged on exit.
-*
-* X - COMPLEX*16 array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x.
-* Unchanged on exit.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* BETA - COMPLEX*16 .
-* On entry, BETA specifies the scalar beta. When BETA is
-* supplied as zero then Y need not be set on input.
-* Unchanged on exit.
-*
-* Y - COMPLEX*16 array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCY ) ).
-* Before entry, the incremented array Y must contain the n
-* element vector y. On exit, Y is overwritten by the updated
-* vector y.
-*
-* INCY - INTEGER.
-* On entry, INCY specifies the increment for the elements of
-* Y. INCY must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE COMPLEX ONE
- PARAMETER (ONE= (1.0D+0,0.0D+0))
- DOUBLE COMPLEX ZERO
- PARAMETER (ZERO= (0.0D+0,0.0D+0))
-* ..
-* .. Local Scalars ..
- DOUBLE COMPLEX TEMP1,TEMP2
- INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC DBLE,DCONJG
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (N.LT.0) THEN
- INFO = 2
- ELSE IF (INCX.EQ.0) THEN
- INFO = 6
- ELSE IF (INCY.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('ZHPMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-* Set up the start points in X and Y.
-*
- IF (INCX.GT.0) THEN
- KX = 1
- ELSE
- KX = 1 - (N-1)*INCX
- END IF
- IF (INCY.GT.0) THEN
- KY = 1
- ELSE
- KY = 1 - (N-1)*INCY
- END IF
-*
-* Start the operations. In this version the elements of the array AP
-* are accessed sequentially with one pass through AP.
-*
-* First form y := beta*y.
-*
- IF (BETA.NE.ONE) THEN
- IF (INCY.EQ.1) THEN
- IF (BETA.EQ.ZERO) THEN
- DO 10 I = 1,N
- Y(I) = ZERO
- 10 CONTINUE
- ELSE
- DO 20 I = 1,N
- Y(I) = BETA*Y(I)
- 20 CONTINUE
- END IF
- ELSE
- IY = KY
- IF (BETA.EQ.ZERO) THEN
- DO 30 I = 1,N
- Y(IY) = ZERO
- IY = IY + INCY
- 30 CONTINUE
- ELSE
- DO 40 I = 1,N
- Y(IY) = BETA*Y(IY)
- IY = IY + INCY
- 40 CONTINUE
- END IF
- END IF
- END IF
- IF (ALPHA.EQ.ZERO) RETURN
- KK = 1
- IF (LSAME(UPLO,'U')) THEN
-*
-* Form y when AP contains the upper triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 60 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- K = KK
- DO 50 I = 1,J - 1
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + DCONJG(AP(K))*X(I)
- K = K + 1
- 50 CONTINUE
- Y(J) = Y(J) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2
- KK = KK + J
- 60 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 80 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- IX = KX
- IY = KY
- DO 70 K = KK,KK + J - 2
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX)
- IX = IX + INCX
- IY = IY + INCY
- 70 CONTINUE
- Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + J
- 80 CONTINUE
- END IF
- ELSE
-*
-* Form y when AP contains the lower triangle.
-*
- IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
- DO 100 J = 1,N
- TEMP1 = ALPHA*X(J)
- TEMP2 = ZERO
- Y(J) = Y(J) + TEMP1*DBLE(AP(KK))
- K = KK + 1
- DO 90 I = J + 1,N
- Y(I) = Y(I) + TEMP1*AP(K)
- TEMP2 = TEMP2 + DCONJG(AP(K))*X(I)
- K = K + 1
- 90 CONTINUE
- Y(J) = Y(J) + ALPHA*TEMP2
- KK = KK + (N-J+1)
- 100 CONTINUE
- ELSE
- JX = KX
- JY = KY
- DO 120 J = 1,N
- TEMP1 = ALPHA*X(JX)
- TEMP2 = ZERO
- Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK))
- IX = JX
- IY = JY
- DO 110 K = KK + 1,KK + N - J
- IX = IX + INCX
- IY = IY + INCY
- Y(IY) = Y(IY) + TEMP1*AP(K)
- TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX)
- 110 CONTINUE
- Y(JY) = Y(JY) + ALPHA*TEMP2
- JX = JX + INCX
- JY = JY + INCY
- KK = KK + (N-J+1)
- 120 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of ZHPMV .
-*
- END
diff --git a/blas/ztbmv.f b/blas/ztbmv.f
deleted file mode 100644
index 7c85c1b55..000000000
--- a/blas/ztbmv.f
+++ /dev/null
@@ -1,366 +0,0 @@
- SUBROUTINE ZTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-* .. Scalar Arguments ..
- INTEGER INCX,K,LDA,N
- CHARACTER DIAG,TRANS,UPLO
-* ..
-* .. Array Arguments ..
- DOUBLE COMPLEX A(LDA,*),X(*)
-* ..
-*
-* Purpose
-* =======
-*
-* ZTBMV performs one of the matrix-vector operations
-*
-* x := A*x, or x := A'*x, or x := conjg( A' )*x,
-*
-* where x is an n element vector and A is an n by n unit, or non-unit,
-* upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*
-* Arguments
-* ==========
-*
-* UPLO - CHARACTER*1.
-* On entry, UPLO specifies whether the matrix is an upper or
-* lower triangular matrix as follows:
-*
-* UPLO = 'U' or 'u' A is an upper triangular matrix.
-*
-* UPLO = 'L' or 'l' A is a lower triangular matrix.
-*
-* Unchanged on exit.
-*
-* TRANS - CHARACTER*1.
-* On entry, TRANS specifies the operation to be performed as
-* follows:
-*
-* TRANS = 'N' or 'n' x := A*x.
-*
-* TRANS = 'T' or 't' x := A'*x.
-*
-* TRANS = 'C' or 'c' x := conjg( A' )*x.
-*
-* Unchanged on exit.
-*
-* DIAG - CHARACTER*1.
-* On entry, DIAG specifies whether or not A is unit
-* triangular as follows:
-*
-* DIAG = 'U' or 'u' A is assumed to be unit triangular.
-*
-* DIAG = 'N' or 'n' A is not assumed to be unit
-* triangular.
-*
-* Unchanged on exit.
-*
-* N - INTEGER.
-* On entry, N specifies the order of the matrix A.
-* N must be at least zero.
-* Unchanged on exit.
-*
-* K - INTEGER.
-* On entry with UPLO = 'U' or 'u', K specifies the number of
-* super-diagonals of the matrix A.
-* On entry with UPLO = 'L' or 'l', K specifies the number of
-* sub-diagonals of the matrix A.
-* K must satisfy 0 .le. K.
-* Unchanged on exit.
-*
-* A - COMPLEX*16 array of DIMENSION ( LDA, n ).
-* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-* by n part of the array A must contain the upper triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row
-* ( k + 1 ) of the array, the first super-diagonal starting at
-* position 2 in row k, and so on. The top left k by k triangle
-* of the array A is not referenced.
-* The following program segment will transfer an upper
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = K + 1 - J
-* DO 10, I = MAX( 1, J - K ), J
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-* by n part of the array A must contain the lower triangular
-* band part of the matrix of coefficients, supplied column by
-* column, with the leading diagonal of the matrix in row 1 of
-* the array, the first sub-diagonal starting at position 1 in
-* row 2, and so on. The bottom right k by k triangle of the
-* array A is not referenced.
-* The following program segment will transfer a lower
-* triangular band matrix from conventional full matrix storage
-* to band storage:
-*
-* DO 20, J = 1, N
-* M = 1 - J
-* DO 10, I = J, MIN( N, J + K )
-* A( M + I, J ) = matrix( I, J )
-* 10 CONTINUE
-* 20 CONTINUE
-*
-* Note that when DIAG = 'U' or 'u' the elements of the array A
-* corresponding to the diagonal elements of the matrix are not
-* referenced, but are assumed to be unity.
-* Unchanged on exit.
-*
-* LDA - INTEGER.
-* On entry, LDA specifies the first dimension of A as declared
-* in the calling (sub) program. LDA must be at least
-* ( k + 1 ).
-* Unchanged on exit.
-*
-* X - COMPLEX*16 array of dimension at least
-* ( 1 + ( n - 1 )*abs( INCX ) ).
-* Before entry, the incremented array X must contain the n
-* element vector x. On exit, X is overwritten with the
-* tranformed vector x.
-*
-* INCX - INTEGER.
-* On entry, INCX specifies the increment for the elements of
-* X. INCX must not be zero.
-* Unchanged on exit.
-*
-* Further Details
-* ===============
-*
-* Level 2 Blas routine.
-*
-* -- Written on 22-October-1986.
-* Jack Dongarra, Argonne National Lab.
-* Jeremy Du Croz, Nag Central Office.
-* Sven Hammarling, Nag Central Office.
-* Richard Hanson, Sandia National Labs.
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE COMPLEX ZERO
- PARAMETER (ZERO= (0.0D+0,0.0D+0))
-* ..
-* .. Local Scalars ..
- DOUBLE COMPLEX TEMP
- INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
- LOGICAL NOCONJ,NOUNIT
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
-* ..
-* .. External Subroutines ..
- EXTERNAL XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC DCONJG,MAX,MIN
-* ..
-*
-* Test the input parameters.
-*
- INFO = 0
- IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
- INFO = 1
- ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
- + .NOT.LSAME(TRANS,'C')) THEN
- INFO = 2
- ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
- INFO = 3
- ELSE IF (N.LT.0) THEN
- INFO = 4
- ELSE IF (K.LT.0) THEN
- INFO = 5
- ELSE IF (LDA.LT. (K+1)) THEN
- INFO = 7
- ELSE IF (INCX.EQ.0) THEN
- INFO = 9
- END IF
- IF (INFO.NE.0) THEN
- CALL XERBLA('ZTBMV ',INFO)
- RETURN
- END IF
-*
-* Quick return if possible.
-*
- IF (N.EQ.0) RETURN
-*
- NOCONJ = LSAME(TRANS,'T')
- NOUNIT = LSAME(DIAG,'N')
-*
-* Set up the start point in X if the increment is not unity. This
-* will be ( N - 1 )*INCX too small for descending loops.
-*
- IF (INCX.LE.0) THEN
- KX = 1 - (N-1)*INCX
- ELSE IF (INCX.NE.1) THEN
- KX = 1
- END IF
-*
-* Start the operations. In this version the elements of A are
-* accessed sequentially with one pass through A.
-*
- IF (LSAME(TRANS,'N')) THEN
-*
-* Form x := A*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 20 J = 1,N
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = KPLUS1 - J
- DO 10 I = MAX(1,J-K),J - 1
- X(I) = X(I) + TEMP*A(L+I,J)
- 10 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
- END IF
- 20 CONTINUE
- ELSE
- JX = KX
- DO 40 J = 1,N
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = KPLUS1 - J
- DO 30 I = MAX(1,J-K),J - 1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX + INCX
- 30 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
- END IF
- JX = JX + INCX
- IF (J.GT.K) KX = KX + INCX
- 40 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 60 J = N,1,-1
- IF (X(J).NE.ZERO) THEN
- TEMP = X(J)
- L = 1 - J
- DO 50 I = MIN(N,J+K),J + 1,-1
- X(I) = X(I) + TEMP*A(L+I,J)
- 50 CONTINUE
- IF (NOUNIT) X(J) = X(J)*A(1,J)
- END IF
- 60 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 80 J = N,1,-1
- IF (X(JX).NE.ZERO) THEN
- TEMP = X(JX)
- IX = KX
- L = 1 - J
- DO 70 I = MIN(N,J+K),J + 1,-1
- X(IX) = X(IX) + TEMP*A(L+I,J)
- IX = IX - INCX
- 70 CONTINUE
- IF (NOUNIT) X(JX) = X(JX)*A(1,J)
- END IF
- JX = JX - INCX
- IF ((N-J).GE.K) KX = KX - INCX
- 80 CONTINUE
- END IF
- END IF
- ELSE
-*
-* Form x := A'*x or x := conjg( A' )*x.
-*
- IF (LSAME(UPLO,'U')) THEN
- KPLUS1 = K + 1
- IF (INCX.EQ.1) THEN
- DO 110 J = N,1,-1
- TEMP = X(J)
- L = KPLUS1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 90 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(I)
- 90 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J))
- DO 100 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + DCONJG(A(L+I,J))*X(I)
- 100 CONTINUE
- END IF
- X(J) = TEMP
- 110 CONTINUE
- ELSE
- KX = KX + (N-1)*INCX
- JX = KX
- DO 140 J = N,1,-1
- TEMP = X(JX)
- KX = KX - INCX
- IX = KX
- L = KPLUS1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
- DO 120 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX - INCX
- 120 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J))
- DO 130 I = J - 1,MAX(1,J-K),-1
- TEMP = TEMP + DCONJG(A(L+I,J))*X(IX)
- IX = IX - INCX
- 130 CONTINUE
- END IF
- X(JX) = TEMP
- JX = JX - INCX
- 140 CONTINUE
- END IF
- ELSE
- IF (INCX.EQ.1) THEN
- DO 170 J = 1,N
- TEMP = X(J)
- L = 1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 150 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(I)
- 150 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J))
- DO 160 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + DCONJG(A(L+I,J))*X(I)
- 160 CONTINUE
- END IF
- X(J) = TEMP
- 170 CONTINUE
- ELSE
- JX = KX
- DO 200 J = 1,N
- TEMP = X(JX)
- KX = KX + INCX
- IX = KX
- L = 1 - J
- IF (NOCONJ) THEN
- IF (NOUNIT) TEMP = TEMP*A(1,J)
- DO 180 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + A(L+I,J)*X(IX)
- IX = IX + INCX
- 180 CONTINUE
- ELSE
- IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J))
- DO 190 I = J + 1,MIN(N,J+K)
- TEMP = TEMP + DCONJG(A(L+I,J))*X(IX)
- IX = IX + INCX
- 190 CONTINUE
- END IF
- X(JX) = TEMP
- JX = JX + INCX
- 200 CONTINUE
- END IF
- END IF
- END IF
-*
- RETURN
-*
-* End of ZTBMV .
-*
- END
diff --git a/cmake/EigenConfigureTesting.cmake b/cmake/EigenConfigureTesting.cmake
index 0b5de95bb..737f77232 100644
--- a/cmake/EigenConfigureTesting.cmake
+++ b/cmake/EigenConfigureTesting.cmake
@@ -14,49 +14,23 @@ add_dependencies(check buildtests)
# check whether /bin/bash exists
find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH)
-# CMake/Ctest does not allow us to change the build command,
-# so we have to workaround by directly editing the generated DartConfiguration.tcl file
-# save CMAKE_MAKE_PROGRAM
-set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
-# and set a fake one
-set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
-
# This call activates testing and generates the DartConfiguration.tcl
include(CTest)
set(EIGEN_TEST_BUILD_FLAGS "" CACHE STRING "Options passed to the build command of unit tests")
-# overwrite default DartConfiguration.tcl
-# The worarounds are different for each version of the MSVC IDE
-set(EIGEN_TEST_TARGET buildtests)
-if(MSVC_IDE)
- if(CMAKE_MAKE_PROGRAM_SAVE MATCHES "devenv") # devenv
- set(EIGEN_BUILD_COMMAND "${CMAKE_MAKE_PROGRAM_SAVE} Eigen.sln /build Release /project ${EIGEN_TEST_TARGET}")
- else() # msbuild
- set(EIGEN_BUILD_COMMAND "${CMAKE_MAKE_PROGRAM_SAVE} ${EIGEN_TEST_TARGET}.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE}")
- endif()
-
- # append the build flags if provided
- if(NOT "${EIGEN_TEST_BUILD_FLAGS}" MATCHES "^[ \t]*$")
- set(EIGEN_BUILD_COMMAND "${EIGEN_BUILD_COMMAND} ${EIGEN_TEST_BUILD_FLAGS}")
- endif()
-
- # apply the dartconfig hack ...
- set(EIGEN_MAKECOMMAND_PLACEHOLDER "${EIGEN_BUILD_COMMAND}\n#")
-else()
- # for make and nmake
- set(EIGEN_BUILD_COMMAND "${CMAKE_MAKE_PROGRAM_SAVE} ${EIGEN_TEST_TARGET} ${EIGEN_TEST_BUILD_FLAGS}")
- set(EIGEN_MAKECOMMAND_PLACEHOLDER "${EIGEN_BUILD_COMMAND}")
+# Overwrite default DartConfiguration.tcl such that ctest can build our unit tests.
+# Recall that our unit tests are not in the "all" target, so we have to explicitely ask ctest to build our custom 'buildtests' target.
+# At this stage, we can also add custom flags to the build tool through the user defined EIGEN_TEST_BUILD_FLAGS variable.
+file(READ "${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl" EIGEN_DART_CONFIG_FILE)
+# try to grab the default flags
+string(REGEX MATCH "MakeCommand:.*-- (.*)\nDefaultCTestConfigurationType" EIGEN_DUMMY ${EIGEN_DART_CONFIG_FILE})
+if(NOT CMAKE_MATCH_1)
+string(REGEX MATCH "MakeCommand:.*[^c]make (.*)\nDefaultCTestConfigurationType" EIGEN_DUMMY ${EIGEN_DART_CONFIG_FILE})
endif()
-
-configure_file(${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
-
-# restore default CMAKE_MAKE_PROGRAM
-set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
-
-# un-set temporary variables so that it is like they never existed
-unset(CMAKE_MAKE_PROGRAM_SAVE)
-unset(EIGEN_MAKECOMMAND_PLACEHOLDER)
+string(REGEX REPLACE "MakeCommand:.*DefaultCTestConfigurationType" "MakeCommand: ${CMAKE_COMMAND} --build . --target buildtests --config \"\${CTEST_CONFIGURATION_TYPE}\" -- ${CMAKE_MATCH_1} ${EIGEN_TEST_BUILD_FLAGS}\nDefaultCTestConfigurationType"
+ EIGEN_DART_CONFIG_FILE2 ${EIGEN_DART_CONFIG_FILE})
+file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl" ${EIGEN_DART_CONFIG_FILE2})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 9b9776894..3eb4e4c07 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -282,12 +282,24 @@ macro(ei_testing_print_summary)
message(STATUS "Altivec: Using architecture defaults")
endif()
+ if(EIGEN_TEST_VSX)
+ message(STATUS "VSX: ON")
+ else()
+ message(STATUS "VSX: Using architecture defaults")
+ endif()
+
if(EIGEN_TEST_NEON)
message(STATUS "ARM NEON: ON")
else()
message(STATUS "ARM NEON: Using architecture defaults")
endif()
+ if(EIGEN_TEST_NEON64)
+ message(STATUS "ARMv8 NEON: ON")
+ else()
+ message(STATUS "ARMv8 NEON: Using architecture defaults")
+ endif()
+
endif() # vectorization / alignment options
message(STATUS "\n${EIGEN_TESTING_SUMMARY}")
@@ -418,6 +430,10 @@ macro(ei_get_cxxflags VAR)
ei_is_64bit_env(IS_64BIT_ENV)
if(EIGEN_TEST_NEON)
set(${VAR} NEON)
+ elseif(EIGEN_TEST_NEON64)
+ set(${VAR} NEON)
+ elseif(EIGEN_TEST_VSX)
+ set(${VAR} VSX)
elseif(EIGEN_TEST_ALTIVEC)
set(${VAR} ALVEC)
elseif(EIGEN_TEST_FMA)
@@ -481,20 +497,12 @@ macro(ei_set_build_string)
endmacro(ei_set_build_string)
macro(ei_is_64bit_env VAR)
-
- file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/is64.cpp"
- "int main() { return (sizeof(int*) == 8 ? 1 : 0); }
- ")
- try_run(run_res compile_res
- ${CMAKE_CURRENT_BINARY_DIR} "${CMAKE_CURRENT_BINARY_DIR}/is64.cpp"
- RUN_OUTPUT_VARIABLE run_output)
-
- if(compile_res AND run_res)
- set(${VAR} ${run_res})
- elseif(CMAKE_CL_64)
- set(${VAR} 1)
- elseif("$ENV{Platform}" STREQUAL "X64") # nmake 64 bit
+ if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(${VAR} 1)
+ elseif(CMAKE_SIZEOF_VOID_P EQUAL 4)
+ set(${VAR} 0)
+ else()
+ message(WARNING "Unsupported pointer size. Please contact the authors.")
endif()
endmacro(ei_is_64bit_env)
diff --git a/cmake/EigenUninstall.cmake b/cmake/EigenUninstall.cmake
new file mode 100644
index 000000000..4dae8c85c
--- /dev/null
+++ b/cmake/EigenUninstall.cmake
@@ -0,0 +1,40 @@
+################ CMake Uninstall Template #######################
+# CMake Template file for uninstallation of files
+# mentioned in 'install_manifest.txt'
+#
+# Used by uinstall target
+#################################################################
+
+set(MANIFEST "${CMAKE_CURRENT_BINARY_DIR}/install_manifest.txt")
+
+if(EXISTS ${MANIFEST})
+ message(STATUS "============== Uninstalling Eigen ===================")
+
+ file(STRINGS ${MANIFEST} files)
+ foreach(file ${files})
+ if(EXISTS ${file})
+ message(STATUS "Removing file: '${file}'")
+
+ execute_process(
+ COMMAND ${CMAKE_COMMAND} -E remove ${file}
+ OUTPUT_VARIABLE rm_out
+ RESULT_VARIABLE rm_retval
+ )
+
+ if(NOT "${rm_retval}" STREQUAL 0)
+ message(FATAL_ERROR "Failed to remove file: '${file}'.")
+ endif()
+ else()
+ message(STATUS "File '${file}' does not exist.")
+ endif()
+ endforeach(file)
+
+ message(STATUS "========== Finished Uninstalling Eigen ==============")
+else()
+ message(STATUS "Cannot find install manifest: '${MANIFEST}'")
+ message(STATUS "Probably make install has not been performed")
+ message(STATUS " or install_manifest.txt has been deleted.")
+endif()
+
+
+
diff --git a/doc/AsciiQuickReference.txt b/doc/AsciiQuickReference.txt
index c4d021624..b5bdfa1f4 100644
--- a/doc/AsciiQuickReference.txt
+++ b/doc/AsciiQuickReference.txt
@@ -67,10 +67,10 @@ P.rightCols<cols>() // P(:, end-cols+1:end)
P.rightCols(cols) // P(:, end-cols+1:end)
P.topRows<rows>() // P(1:rows, :)
P.topRows(rows) // P(1:rows, :)
-P.middleRows<rows>(i) // P(:, i+1:i+rows)
-P.middleRows(i, rows) // P(:, i+1:i+rows)
-P.bottomRows<rows>() // P(:, end-rows+1:end)
-P.bottomRows(rows) // P(:, end-rows+1:end)
+P.middleRows<rows>(i) // P(i+1:i+rows, :)
+P.middleRows(i, rows) // P(i+1:i+rows, :)
+P.bottomRows<rows>() // P(end-rows+1:end, :)
+P.bottomRows(rows) // P(end-rows+1:end, :)
P.topLeftCorner(rows, cols) // P(1:rows, 1:cols)
P.topRightCorner(rows, cols) // P(1:rows, end-cols+1:end)
P.bottomLeftCorner(rows, cols) // P(end-rows+1:end, 1:cols)
diff --git a/doc/Manual.dox b/doc/Manual.dox
index 43af857a5..bf1a33229 100644
--- a/doc/Manual.dox
+++ b/doc/Manual.dox
@@ -13,6 +13,7 @@ namespace Eigen {
- \subpage TopicUsingIntelMKL
- \subpage TopicCUDA
- \subpage TopicTemplateKeyword
+ - \subpage TopicNewExpressionType
- \subpage UserManual_UnderstandingEigen
*/
diff --git a/doc/NewExpressionType.dox b/doc/NewExpressionType.dox
new file mode 100644
index 000000000..ad8b7f86b
--- /dev/null
+++ b/doc/NewExpressionType.dox
@@ -0,0 +1,137 @@
+namespace Eigen {
+
+/** \page TopicNewExpressionType Adding a new expression type
+
+This page describes with the help of an example how to implement a new
+light-weight expression type in %Eigen. This consists of three parts:
+the expression type itself, a traits class containing compile-time
+information about the expression, and the evaluator class which is
+used to evaluate the expression to a matrix.
+
+\b TO \b DO: Write a page explaining the design, with details on
+vectorization etc., and refer to that page here.
+
+
+\eigenAutoToc
+
+\section TopicSetting The setting
+
+A circulant matrix is a matrix where each column is the same as the
+column to the left, except that it is cyclically shifted downwards.
+For example, here is a 4-by-4 circulant matrix:
+\f[ \begin{bmatrix}
+ 1 & 8 & 4 & 2 \\
+ 2 & 1 & 8 & 4 \\
+ 4 & 2 & 1 & 8 \\
+ 8 & 4 & 2 & 1
+\end{bmatrix} \f]
+A circulant matrix is uniquely determined by its first column. We wish
+to write a function \c makeCirculant which, given the first column,
+returns an expression representing the circulant matrix.
+
+For simplicity, we restrict the \c makeCirculant function to dense
+matrices. It may make sense to also allow arrays, or sparse matrices,
+but we will not do so here. We also do not want to support
+vectorization.
+
+
+\section TopicPreamble Getting started
+
+We will present the file implementing the \c makeCirculant function
+part by part. We start by including the appropriate header files and
+forward declaring the expression class, which we will call
+\c Circulant. The \c makeCirculant function will return an object of
+this type. The class \c Circulant is in fact a class template; the
+template argument \c ArgType refers to the type of the vector passed
+to the \c makeCirculant function.
+
+\include make_circulant.cpp.preamble
+
+
+\section TopicTraits The traits class
+
+For every expression class \c X, there should be a traits class
+\c Traits<X> in the \c Eigen::internal namespace containing
+information about \c X known as compile time.
+
+As explained in \ref TopicSetting, we designed the \c Circulant
+expression class to refer to dense matrices. The entries of the
+circulant matrix have the same type as the entries of the vector
+passed to the \c makeCirculant function. The type used to index the
+entries is also the same. Again for simplicity, we will only return
+column-major matrices. Finally, the circulant matrix is a square
+matrix (number of rows equals number of columns), and the number of
+rows equals the number of rows of the column vector passed to the
+\c makeCirculant function. If this is a dynamic-size vector, then the
+size of the circulant matrix is not known at compile-time.
+
+This leads to the following code:
+
+\include make_circulant.cpp.traits
+
+
+\section TopicExpression The expression class
+
+The next step is to define the expression class itself. In our case,
+we want to inherit from \c MatrixBase in order to expose the interface
+for dense matrices. In the constructor, we check that we are passed a
+column vector (see \ref TopicAssertions) and we store the vector from
+which we are going to build the circulant matrix in the member
+variable \c m_arg. Finally, the expression class should compute the
+size of the corresponding circulant matrix. As explained above, this
+is a square matrix with as many columns as the vector used to
+construct the matrix.
+
+\b TO \b DO: What about the \c Nested typedef? It seems to be
+necessary; is this only temporary?
+
+\include make_circulant.cpp.expression
+
+
+\section TopicEvaluator The evaluator
+
+The last big fragment implements the evaluator for the \c Circulant
+expression. The evaluator computes the entries of the circulant
+matrix; this is done in the \c .coeff() member function. The entries
+are computed by finding the corresponding entry of the vector from
+which the circulant matrix is constructed. Getting this entry may
+actually be non-trivial when the circulant matrix is constructed from
+a vector which is given by a complicated expression, so we use the
+evaluator which corresponds to the vector.
+
+The \c CoeffReadCost constant records the cost of computing an entry
+of the circulant matrix; we ignore the index computation and say that
+this is the same as the cost of computing an entry of the vector from
+which the circulant matrix is constructed.
+
+In the constructor, we save the evaluator for the column vector which
+defined the circulant matrix. We also save the size of that vector;
+remember that we can query an expression object to find the size but
+not the evaluator.
+
+\include make_circulant.cpp.evaluator
+
+
+\section TopicEntry The entry point
+
+After all this, the \c makeCirculant function is very simple. It
+simply creates an expression object and returns it.
+
+\include make_circulant.cpp.entry
+
+
+\section TopicMain A simple main function for testing
+
+Finally, a short \c main function that shows how the \c makeCirculant
+function can be called.
+
+\include make_circulant.cpp.main
+
+If all the fragments are combined, the following output is produced,
+showing that the program works as expected:
+
+\verbinclude make_circulant.out
+
+*/
+}
+
diff --git a/doc/SparseQuickReference.dox b/doc/SparseQuickReference.dox
index 4a33d0cc9..d04ac35c5 100644
--- a/doc/SparseQuickReference.dox
+++ b/doc/SparseQuickReference.dox
@@ -71,11 +71,10 @@ i.e either row major or column major. The default is column major. Most arithmet
<td> Constant or Random Insertion</td>
<td>
\code
-sm1.setZero(); // Set the matrix with zero elements
-sm1.setConstant(val); //Replace all the nonzero values with val
+sm1.setZero();
\endcode
</td>
-<td> The matrix sm1 should have been created before ???</td>
+<td>Remove all non-zero coefficients</td>
</tr>
</table>
diff --git a/doc/examples/make_circulant.cpp b/doc/examples/make_circulant.cpp
new file mode 100644
index 000000000..92e6aaa2b
--- /dev/null
+++ b/doc/examples/make_circulant.cpp
@@ -0,0 +1,11 @@
+/*
+This program is presented in several fragments in the doc page.
+Every fragment is in its own file; this file simply combines them.
+*/
+
+#include "make_circulant.cpp.preamble"
+#include "make_circulant.cpp.traits"
+#include "make_circulant.cpp.expression"
+#include "make_circulant.cpp.evaluator"
+#include "make_circulant.cpp.entry"
+#include "make_circulant.cpp.main"
diff --git a/doc/examples/make_circulant.cpp.entry b/doc/examples/make_circulant.cpp.entry
new file mode 100644
index 000000000..f9d2eb8a9
--- /dev/null
+++ b/doc/examples/make_circulant.cpp.entry
@@ -0,0 +1,5 @@
+template <class ArgType>
+Circulant<ArgType> makeCirculant(const Eigen::MatrixBase<ArgType>& arg)
+{
+ return Circulant<ArgType>(arg.derived());
+}
diff --git a/doc/examples/make_circulant.cpp.evaluator b/doc/examples/make_circulant.cpp.evaluator
new file mode 100644
index 000000000..98713cdc0
--- /dev/null
+++ b/doc/examples/make_circulant.cpp.evaluator
@@ -0,0 +1,33 @@
+namespace Eigen {
+ namespace internal {
+ template<typename ArgType>
+ struct evaluator<Circulant<ArgType> >
+ : evaluator_base<Circulant<ArgType> >
+ {
+ typedef Circulant<ArgType> XprType;
+ typedef typename nested_eval<ArgType, XprType::ColsAtCompileTime>::type ArgTypeNested;
+ typedef typename remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::Index Index;
+
+ enum {
+ CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+ Flags = Eigen::ColMajor
+ };
+
+ evaluator(const XprType& xpr)
+ : m_argImpl(xpr.m_arg), m_rows(xpr.rows())
+ { }
+
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ Index index = row - col;
+ if (index < 0) index += m_rows;
+ return m_argImpl.coeff(index);
+ }
+
+ typename evaluator<ArgTypeNestedCleaned>::nestedType m_argImpl;
+ const Index m_rows;
+ };
+ }
+}
diff --git a/doc/examples/make_circulant.cpp.expression b/doc/examples/make_circulant.cpp.expression
new file mode 100644
index 000000000..a68bcb730
--- /dev/null
+++ b/doc/examples/make_circulant.cpp.expression
@@ -0,0 +1,20 @@
+template <class ArgType>
+class Circulant : public Eigen::MatrixBase<Circulant<ArgType> >
+{
+public:
+ Circulant(const ArgType& arg)
+ : m_arg(arg)
+ {
+ EIGEN_STATIC_ASSERT(ArgType::ColsAtCompileTime == 1,
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX);
+ }
+
+ typedef typename Eigen::internal::ref_selector<Circulant>::type Nested;
+
+ typedef typename Eigen::internal::traits<Circulant>::Index Index;
+ Index rows() const { return m_arg.rows(); }
+ Index cols() const { return m_arg.rows(); }
+
+ typedef typename Eigen::internal::ref_selector<ArgType>::type ArgTypeNested;
+ ArgTypeNested m_arg;
+};
diff --git a/doc/examples/make_circulant.cpp.main b/doc/examples/make_circulant.cpp.main
new file mode 100644
index 000000000..877f97f62
--- /dev/null
+++ b/doc/examples/make_circulant.cpp.main
@@ -0,0 +1,8 @@
+int main()
+{
+ Eigen::VectorXd vec(4);
+ vec << 1, 2, 4, 8;
+ Eigen::MatrixXd mat;
+ mat = makeCirculant(vec);
+ std::cout << mat << std::endl;
+}
diff --git a/doc/examples/make_circulant.cpp.preamble b/doc/examples/make_circulant.cpp.preamble
new file mode 100644
index 000000000..e575cce14
--- /dev/null
+++ b/doc/examples/make_circulant.cpp.preamble
@@ -0,0 +1,4 @@
+#include <Eigen/Core>
+#include <iostream>
+
+template <class ArgType> class Circulant;
diff --git a/doc/examples/make_circulant.cpp.traits b/doc/examples/make_circulant.cpp.traits
new file mode 100644
index 000000000..f91e43717
--- /dev/null
+++ b/doc/examples/make_circulant.cpp.traits
@@ -0,0 +1,19 @@
+namespace Eigen {
+ namespace internal {
+ template <class ArgType>
+ struct traits<Circulant<ArgType> >
+ {
+ typedef Eigen::Dense StorageKind;
+ typedef Eigen::MatrixXpr XprKind;
+ typedef typename ArgType::Index Index;
+ typedef typename ArgType::Scalar Scalar;
+ enum {
+ Flags = Eigen::ColMajor,
+ RowsAtCompileTime = ArgType::RowsAtCompileTime,
+ ColsAtCompileTime = ArgType::RowsAtCompileTime,
+ MaxRowsAtCompileTime = ArgType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = ArgType::MaxRowsAtCompileTime
+ };
+ };
+ }
+}
diff --git a/failtest/CMakeLists.txt b/failtest/CMakeLists.txt
index 5afa2ac82..d2fea7bdc 100644
--- a/failtest/CMakeLists.txt
+++ b/failtest/CMakeLists.txt
@@ -7,6 +7,9 @@ ei_add_failtest("block_nonconst_ctor_on_const_xpr_1")
ei_add_failtest("block_nonconst_ctor_on_const_xpr_2")
ei_add_failtest("transpose_nonconst_ctor_on_const_xpr")
ei_add_failtest("diagonal_nonconst_ctor_on_const_xpr")
+ei_add_failtest("cwiseunaryview_nonconst_ctor_on_const_xpr")
+ei_add_failtest("triangularview_nonconst_ctor_on_const_xpr")
+ei_add_failtest("selfadjointview_nonconst_ctor_on_const_xpr")
ei_add_failtest("const_qualified_block_method_retval_0")
ei_add_failtest("const_qualified_block_method_retval_1")
@@ -25,6 +28,18 @@ ei_add_failtest("block_on_const_type_actually_const_0")
ei_add_failtest("block_on_const_type_actually_const_1")
ei_add_failtest("transpose_on_const_type_actually_const")
ei_add_failtest("diagonal_on_const_type_actually_const")
+ei_add_failtest("cwiseunaryview_on_const_type_actually_const")
+ei_add_failtest("triangularview_on_const_type_actually_const")
+ei_add_failtest("selfadjointview_on_const_type_actually_const")
+
+ei_add_failtest("ref_1")
+ei_add_failtest("ref_2")
+ei_add_failtest("ref_3")
+ei_add_failtest("ref_4")
+ei_add_failtest("ref_5")
+
+ei_add_failtest("swap_1")
+ei_add_failtest("swap_2")
if (EIGEN_FAILTEST_FAILURE_COUNT)
message(FATAL_ERROR
diff --git a/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp b/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp
new file mode 100644
index 000000000..e23cf8fd8
--- /dev/null
+++ b/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp
@@ -0,0 +1,15 @@
+#include "../Eigen/Core"
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+#define CV_QUALIFIER const
+#else
+#define CV_QUALIFIER
+#endif
+
+using namespace Eigen;
+
+void foo(CV_QUALIFIER Matrix3d &m){
+ CwiseUnaryView<internal::scalar_real_ref_op<double>,Matrix3d> t(m);
+}
+
+int main() {}
diff --git a/failtest/cwiseunaryview_on_const_type_actually_const.cpp b/failtest/cwiseunaryview_on_const_type_actually_const.cpp
new file mode 100644
index 000000000..fcd41dfdb
--- /dev/null
+++ b/failtest/cwiseunaryview_on_const_type_actually_const.cpp
@@ -0,0 +1,16 @@
+#include "../Eigen/Core"
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+#define CV_QUALIFIER const
+#else
+#define CV_QUALIFIER
+#endif
+
+using namespace Eigen;
+
+void foo(){
+ MatrixXf m;
+ CwiseUnaryView<internal::scalar_real_ref_op<double>,CV_QUALIFIER MatrixXf>(m).coeffRef(0, 0) = 1.0f;
+}
+
+int main() {}
diff --git a/failtest/ref_1.cpp b/failtest/ref_1.cpp
new file mode 100644
index 000000000..8b798d53d
--- /dev/null
+++ b/failtest/ref_1.cpp
@@ -0,0 +1,18 @@
+#include "../Eigen/Core"
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+#define CV_QUALIFIER const
+#else
+#define CV_QUALIFIER
+#endif
+
+using namespace Eigen;
+
+void call_ref(Ref<VectorXf> a) { }
+
+int main()
+{
+ VectorXf a(10);
+ CV_QUALIFIER VectorXf& ac(a);
+ call_ref(ac);
+}
diff --git a/failtest/ref_2.cpp b/failtest/ref_2.cpp
new file mode 100644
index 000000000..0b779ccf5
--- /dev/null
+++ b/failtest/ref_2.cpp
@@ -0,0 +1,15 @@
+#include "../Eigen/Core"
+
+using namespace Eigen;
+
+void call_ref(Ref<VectorXf> a) { }
+
+int main()
+{
+ MatrixXf A(10,10);
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+ call_ref(A.row(3));
+#else
+ call_ref(A.col(3));
+#endif
+}
diff --git a/failtest/ref_3.cpp b/failtest/ref_3.cpp
new file mode 100644
index 000000000..f46027d48
--- /dev/null
+++ b/failtest/ref_3.cpp
@@ -0,0 +1,15 @@
+#include "../Eigen/Core"
+
+using namespace Eigen;
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+void call_ref(Ref<VectorXf> a) { }
+#else
+void call_ref(const Ref<const VectorXf> &a) { }
+#endif
+
+int main()
+{
+ VectorXf a(10);
+ call_ref(a+a);
+}
diff --git a/failtest/ref_4.cpp b/failtest/ref_4.cpp
new file mode 100644
index 000000000..6c11fa4cb
--- /dev/null
+++ b/failtest/ref_4.cpp
@@ -0,0 +1,15 @@
+#include "../Eigen/Core"
+
+using namespace Eigen;
+
+void call_ref(Ref<MatrixXf,0,OuterStride<> > a) {}
+
+int main()
+{
+ MatrixXf A(10,10);
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+ call_ref(A.transpose());
+#else
+ call_ref(A);
+#endif
+}
diff --git a/failtest/ref_5.cpp b/failtest/ref_5.cpp
new file mode 100644
index 000000000..846d52795
--- /dev/null
+++ b/failtest/ref_5.cpp
@@ -0,0 +1,16 @@
+#include "../Eigen/Core"
+
+using namespace Eigen;
+
+void call_ref(Ref<VectorXf> a) { }
+
+int main()
+{
+ VectorXf a(10);
+ DenseBase<VectorXf> &ac(a);
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+ call_ref(ac);
+#else
+ call_ref(ac.derived());
+#endif
+}
diff --git a/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp b/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp
new file mode 100644
index 000000000..a240f8184
--- /dev/null
+++ b/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp
@@ -0,0 +1,15 @@
+#include "../Eigen/Core"
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+#define CV_QUALIFIER const
+#else
+#define CV_QUALIFIER
+#endif
+
+using namespace Eigen;
+
+void foo(CV_QUALIFIER Matrix3d &m){
+ SelfAdjointView<Matrix3d,Upper> t(m);
+}
+
+int main() {}
diff --git a/failtest/selfadjointview_on_const_type_actually_const.cpp b/failtest/selfadjointview_on_const_type_actually_const.cpp
new file mode 100644
index 000000000..19aaad6d0
--- /dev/null
+++ b/failtest/selfadjointview_on_const_type_actually_const.cpp
@@ -0,0 +1,16 @@
+#include "../Eigen/Core"
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+#define CV_QUALIFIER const
+#else
+#define CV_QUALIFIER
+#endif
+
+using namespace Eigen;
+
+void foo(){
+ MatrixXf m;
+ SelfAdjointView<CV_QUALIFIER MatrixXf,Upper>(m).coeffRef(0, 0) = 1.0f;
+}
+
+int main() {}
diff --git a/failtest/swap_1.cpp b/failtest/swap_1.cpp
new file mode 100644
index 000000000..106379720
--- /dev/null
+++ b/failtest/swap_1.cpp
@@ -0,0 +1,14 @@
+#include "../Eigen/Core"
+
+using namespace Eigen;
+
+int main()
+{
+ VectorXf a(10), b(10);
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+ const DenseBase<VectorXf> &ac(a);
+#else
+ DenseBase<VectorXf> &ac(a);
+#endif
+ b.swap(ac);
+}
diff --git a/failtest/swap_2.cpp b/failtest/swap_2.cpp
new file mode 100644
index 000000000..c130ba6e4
--- /dev/null
+++ b/failtest/swap_2.cpp
@@ -0,0 +1,14 @@
+#include "../Eigen/Core"
+
+using namespace Eigen;
+
+int main()
+{
+ VectorXf a(10), b(10);
+ VectorXf const &ac(a);
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+ b.swap(ac);
+#else
+ b.swap(ac.const_cast_derived());
+#endif
+} \ No newline at end of file
diff --git a/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp b/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp
new file mode 100644
index 000000000..807447e4b
--- /dev/null
+++ b/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp
@@ -0,0 +1,15 @@
+#include "../Eigen/Core"
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+#define CV_QUALIFIER const
+#else
+#define CV_QUALIFIER
+#endif
+
+using namespace Eigen;
+
+void foo(CV_QUALIFIER Matrix3d &m){
+ TriangularView<Matrix3d,Upper> t(m);
+}
+
+int main() {}
diff --git a/failtest/triangularview_on_const_type_actually_const.cpp b/failtest/triangularview_on_const_type_actually_const.cpp
new file mode 100644
index 000000000..0a381a612
--- /dev/null
+++ b/failtest/triangularview_on_const_type_actually_const.cpp
@@ -0,0 +1,16 @@
+#include "../Eigen/Core"
+
+#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
+#define CV_QUALIFIER const
+#else
+#define CV_QUALIFIER
+#endif
+
+using namespace Eigen;
+
+void foo(){
+ MatrixXf m;
+ TriangularView<CV_QUALIFIER MatrixXf,Upper>(m).coeffRef(0, 0) = 1.0f;
+}
+
+int main() {}
diff --git a/lapack/complex_double.cpp b/lapack/complex_double.cpp
index 424d2b8ca..c9c575273 100644
--- a/lapack/complex_double.cpp
+++ b/lapack/complex_double.cpp
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -15,3 +15,4 @@
#include "cholesky.cpp"
#include "lu.cpp"
+#include "svd.cpp"
diff --git a/lapack/complex_single.cpp b/lapack/complex_single.cpp
index c0b2d29ae..6d11b26cd 100644
--- a/lapack/complex_single.cpp
+++ b/lapack/complex_single.cpp
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -15,3 +15,4 @@
#include "cholesky.cpp"
#include "lu.cpp"
+#include "svd.cpp"
diff --git a/lapack/double.cpp b/lapack/double.cpp
index d86549e19..ea78bb662 100644
--- a/lapack/double.cpp
+++ b/lapack/double.cpp
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -15,3 +15,4 @@
#include "cholesky.cpp"
#include "lu.cpp"
#include "eigenvalues.cpp"
+#include "svd.cpp"
diff --git a/lapack/eigenvalues.cpp b/lapack/eigenvalues.cpp
index 6141032ab..921c51569 100644
--- a/lapack/eigenvalues.cpp
+++ b/lapack/eigenvalues.cpp
@@ -7,10 +7,10 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#include "common.h"
+#include "lapack_common.h"
#include <Eigen/Eigenvalues>
-// computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges
+// computes eigen values and vectors of a general N-by-N matrix A
EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Scalar* w, Scalar* /*work*/, int* lwork, int *info))
{
// TODO exploit the work buffer
@@ -22,24 +22,7 @@ EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Sca
else if(*n<0) *info = -3;
else if(*lda<std::max(1,*n)) *info = -5;
else if((!query_size) && *lwork<std::max(1,3**n-1)) *info = -8;
-
-// if(*info==0)
-// {
-// int nb = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 )
-// LWKOPT = MAX( 1, ( NB+2 )*N )
-// WORK( 1 ) = LWKOPT
-// *
-// IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY )
-// $ INFO = -8
-// END IF
-// *
-// IF( INFO.NE.0 ) THEN
-// CALL XERBLA( 'SSYEV ', -INFO )
-// RETURN
-// ELSE IF( LQUERY ) THEN
-// RETURN
-// END IF
-
+
if(*info!=0)
{
int e = -*info;
diff --git a/lapack/lapack_common.h b/lapack/lapack_common.h
index e558c1409..a93598784 100644
--- a/lapack/lapack_common.h
+++ b/lapack/lapack_common.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2010-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -18,6 +18,11 @@
typedef Eigen::Map<Eigen::Transpositions<Eigen::Dynamic,Eigen::Dynamic,int> > PivotsType;
+#if ISCOMPLEX
+#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) X,
+#else
+#define EIGEN_LAPACK_ARG_IF_COMPLEX(X)
+#endif
#endif // EIGEN_LAPACK_COMMON_H
diff --git a/lapack/single.cpp b/lapack/single.cpp
index a64ed44e1..c7da3effa 100644
--- a/lapack/single.cpp
+++ b/lapack/single.cpp
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -15,3 +15,4 @@
#include "cholesky.cpp"
#include "lu.cpp"
#include "eigenvalues.cpp"
+#include "svd.cpp"
diff --git a/lapack/svd.cpp b/lapack/svd.cpp
new file mode 100644
index 000000000..df77a371c
--- /dev/null
+++ b/lapack/svd.cpp
@@ -0,0 +1,137 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "lapack_common.h"
+#include <Eigen/SVD>
+
+// computes the singular values/vectors a general M-by-N matrix A using divide-and-conquer
+EIGEN_LAPACK_FUNC(gesdd,(char *jobz, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
+ EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int * /*iwork*/, int *info))
+{
+ // TODO exploit the work buffer
+ bool query_size = *lwork==-1;
+ int diag_size = (std::min)(*m,*n);
+
+ *info = 0;
+ if(*jobz!='A' && *jobz!='S' && *jobz!='O' && *jobz!='N') *info = -1;
+ else if(*m<0) *info = -2;
+ else if(*n<0) *info = -3;
+ else if(*lda<std::max(1,*m)) *info = -5;
+ else if(*lda<std::max(1,*m)) *info = -8;
+ else if(*ldu <1 || (*jobz=='A' && *ldu <*m)
+ || (*jobz=='O' && *m<*n && *ldu<*m)) *info = -8;
+ else if(*ldvt<1 || (*jobz=='A' && *ldvt<*n)
+ || (*jobz=='S' && *ldvt<diag_size)
+ || (*jobz=='O' && *m>=*n && *ldvt<*n)) *info = -10;
+
+ if(*info!=0)
+ {
+ int e = -*info;
+ return xerbla_(SCALAR_SUFFIX_UP"GESDD ", &e, 6);
+ }
+
+ if(query_size)
+ {
+ *lwork = 0;
+ return 0;
+ }
+
+ if(*n==0 || *m==0)
+ return 0;
+
+ PlainMatrixType mat(*m,*n);
+ mat = matrix(a,*m,*n,*lda);
+
+ int option = *jobz=='A' ? ComputeFullU|ComputeFullV
+ : *jobz=='S' ? ComputeThinU|ComputeThinV
+ : *jobz=='O' ? ComputeThinU|ComputeThinV
+ : 0;
+
+ BDCSVD<PlainMatrixType> svd(mat,option);
+
+ make_vector(s,diag_size) = svd.singularValues().head(diag_size);
+
+ if(*jobz=='A')
+ {
+ matrix(u,*m,*m,*ldu) = svd.matrixU();
+ matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint();
+ }
+ else if(*jobz=='S')
+ {
+ matrix(u,*m,diag_size,*ldu) = svd.matrixU();
+ matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint();
+ }
+ else if(*jobz=='O' && *m>=*n)
+ {
+ matrix(a,*m,*n,*lda) = svd.matrixU();
+ matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint();
+ }
+ else if(*jobz=='O')
+ {
+ matrix(u,*m,*m,*ldu) = svd.matrixU();
+ matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint();
+ }
+
+ return 0;
+}
+
+// computes the singular values/vectors a general M-by-N matrix A using two sided jacobi algorithm
+EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
+ EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int *info))
+{
+ // TODO exploit the work buffer
+ bool query_size = *lwork==-1;
+ int diag_size = (std::min)(*m,*n);
+
+ *info = 0;
+ if( *jobu!='A' && *jobu!='S' && *jobu!='O' && *jobu!='N') *info = -1;
+ else if((*jobv!='A' && *jobv!='S' && *jobv!='O' && *jobv!='N')
+ || (*jobu=='O' && *jobv=='O')) *info = -2;
+ else if(*m<0) *info = -3;
+ else if(*n<0) *info = -4;
+ else if(*lda<std::max(1,*m)) *info = -6;
+ else if(*ldu <1 || ((*jobu=='A' || *jobu=='S') && *ldu<*m)) *info = -9;
+ else if(*ldvt<1 || (*jobv=='A' && *ldvt<*n)
+ || (*jobv=='S' && *ldvt<diag_size)) *info = -11;
+
+ if(*info!=0)
+ {
+ int e = -*info;
+ return xerbla_(SCALAR_SUFFIX_UP"GESVD ", &e, 6);
+ }
+
+ if(query_size)
+ {
+ *lwork = 0;
+ return 0;
+ }
+
+ if(*n==0 || *m==0)
+ return 0;
+
+ PlainMatrixType mat(*m,*n);
+ mat = matrix(a,*m,*n,*lda);
+
+ int option = (*jobu=='A' ? ComputeFullU : *jobu=='S' || *jobu=='O' ? ComputeThinU : 0)
+ | (*jobv=='A' ? ComputeFullV : *jobv=='S' || *jobv=='O' ? ComputeThinV : 0);
+
+ JacobiSVD<PlainMatrixType> svd(mat,option);
+
+ make_vector(s,diag_size) = svd.singularValues().head(diag_size);
+
+ if(*jobu=='A') matrix(u,*m,*m,*ldu) = svd.matrixU();
+ else if(*jobu=='S') matrix(u,*m,diag_size,*ldu) = svd.matrixU();
+ else if(*jobu=='O') matrix(a,*m,diag_size,*lda) = svd.matrixU();
+
+ if(*jobv=='A') matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint();
+ else if(*jobv=='S') matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint();
+ else if(*jobv=='O') matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint();
+
+ return 0;
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 47aefddb8..f57d8ce36 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -139,17 +139,12 @@ endif(TEST_LIB)
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
add_custom_target(BuildOfficial)
-option(EIGEN_TEST_EVALUATORS "Enable work in progress evaluators" OFF)
-if(EIGEN_TEST_EVALUATORS)
- add_definitions("-DEIGEN_TEST_EVALUATORS=1")
- add_definitions("-DEIGEN_ENABLE_EVALUATORS=1")
-endif(EIGEN_TEST_EVALUATORS)
-
ei_add_test(meta)
ei_add_test(sizeof)
ei_add_test(dynalloc)
ei_add_test(nomalloc)
ei_add_test(first_aligned)
+ei_add_test(nullary)
ei_add_test(mixingtypes)
ei_add_test(packetmath)
ei_add_test(unalignedassert)
@@ -165,6 +160,9 @@ ei_add_test(redux)
ei_add_test(visitor)
ei_add_test(block)
ei_add_test(corners)
+ei_add_test(swap)
+ei_add_test(resize)
+ei_add_test(conservative_resize)
ei_add_test(product_small)
ei_add_test(product_large)
ei_add_test(product_extra)
@@ -193,6 +191,7 @@ ei_add_test(product_trsolve)
ei_add_test(product_mmtr)
ei_add_test(product_notemporary)
ei_add_test(stable_norm)
+ei_add_test(permutationmatrices)
ei_add_test(bandmatrix)
ei_add_test(cholesky)
ei_add_test(lu)
@@ -212,30 +211,31 @@ ei_add_test(real_qz)
ei_add_test(eigensolver_generalized_real)
ei_add_test(jacobi)
ei_add_test(jacobisvd)
+ei_add_test(bdcsvd)
+ei_add_test(householder)
ei_add_test(geo_orthomethods)
-ei_add_test(geo_homogeneous)
ei_add_test(geo_quaternion)
-ei_add_test(geo_transformations)
ei_add_test(geo_eulerangles)
-ei_add_test(geo_hyperplane)
ei_add_test(geo_parametrizedline)
ei_add_test(geo_alignedbox)
+ei_add_test(geo_hyperplane)
+ei_add_test(geo_transformations)
+ei_add_test(geo_homogeneous)
ei_add_test(stdvector)
ei_add_test(stdvector_overload)
ei_add_test(stdlist)
ei_add_test(stddeque)
-ei_add_test(resize)
-ei_add_test(sparse_vector)
ei_add_test(sparse_basic)
+ei_add_test(sparse_vector)
ei_add_test(sparse_product)
ei_add_test(sparse_solvers)
-ei_add_test(umeyama)
-ei_add_test(householder)
-ei_add_test(swap)
-ei_add_test(conservative_resize)
-ei_add_test(permutationmatrices)
ei_add_test(sparse_permutations)
-ei_add_test(nullary)
+ei_add_test(simplicial_cholesky)
+ei_add_test(conjugate_gradient)
+ei_add_test(bicgstab)
+ei_add_test(sparselu)
+ei_add_test(sparseqr)
+ei_add_test(umeyama)
ei_add_test(nesting_ops "${CMAKE_CXX_FLAGS_DEBUG}")
ei_add_test(zerosized)
ei_add_test(dontalign)
@@ -249,13 +249,7 @@ ei_add_test(special_numbers)
ei_add_test(rvalue_types)
ei_add_test(dense_storage)
-ei_add_test(simplicial_cholesky)
-ei_add_test(conjugate_gradient)
-ei_add_test(bicgstab)
-ei_add_test(sparselu)
-ei_add_test(sparseqr)
-
-# ei_add_test(denseLM)
+# # ei_add_test(denseLM)
if(QT4_FOUND)
ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}")
@@ -313,7 +307,7 @@ endif()
option(EIGEN_TEST_NVCC "Enable NVCC support in unit tests" OFF)
if(EIGEN_TEST_NVCC)
-find_package(CUDA)
+find_package(CUDA 5.0)
if(CUDA_FOUND)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
@@ -331,3 +325,6 @@ endif(CUDA_FOUND)
endif(EIGEN_TEST_NVCC)
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests)
+add_test(NAME failtests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests COMMAND ${CMAKE_COMMAND} ${Eigen_SOURCE_DIR} -G "${CMAKE_GENERATOR}" -DEIGEN_FAILTEST=ON)
diff --git a/test/adjoint.cpp b/test/adjoint.cpp
index ea36f7841..3b2a53c91 100644
--- a/test/adjoint.cpp
+++ b/test/adjoint.cpp
@@ -64,6 +64,7 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
+ const Index PacketSize = internal::packet_traits<Scalar>::size;
Index rows = m.rows();
Index cols = m.cols();
@@ -108,6 +109,17 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
VERIFY_IS_APPROX(m3,m1.transpose());
m3.transposeInPlace();
VERIFY_IS_APPROX(m3,m1);
+
+ if(PacketSize<m3.rows() && PacketSize<m3.cols())
+ {
+ m3 = m1;
+ Index i = internal::random<Index>(0,m3.rows()-PacketSize);
+ Index j = internal::random<Index>(0,m3.cols()-PacketSize);
+ m3.template block<PacketSize,PacketSize>(i,j).transposeInPlace();
+ VERIFY_IS_APPROX( (m3.template block<PacketSize,PacketSize>(i,j)), (m1.template block<PacketSize,PacketSize>(i,j).transpose()) );
+ m3.template block<PacketSize,PacketSize>(i,j).transposeInPlace();
+ VERIFY_IS_APPROX(m3,m1);
+ }
// check inplace adjoint
m3 = m1;
@@ -129,9 +141,19 @@ void test_adjoint()
CALL_SUBTEST_1( adjoint(Matrix<float, 1, 1>()) );
CALL_SUBTEST_2( adjoint(Matrix3d()) );
CALL_SUBTEST_3( adjoint(Matrix4f()) );
+
CALL_SUBTEST_4( adjoint(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
CALL_SUBTEST_5( adjoint(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_6( adjoint(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+
+ // Complement for 128 bits vectorization:
+ CALL_SUBTEST_8( adjoint(Matrix2d()) );
+ CALL_SUBTEST_9( adjoint(Matrix<int,4,4>()) );
+
+ // 256 bits vectorization:
+ CALL_SUBTEST_10( adjoint(Matrix<float,8,8>()) );
+ CALL_SUBTEST_11( adjoint(Matrix<double,4,4>()) );
+ CALL_SUBTEST_12( adjoint(Matrix<int,8,8>()) );
}
// test a large static matrix only once
CALL_SUBTEST_7( adjoint(Matrix<float, 100, 100>()) );
diff --git a/test/array.cpp b/test/array.cpp
index 010fead2d..ac9be097d 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -81,6 +81,31 @@ template<typename ArrayType> void array(const ArrayType& m)
VERIFY_IS_APPROX(m3.rowwise() += rv1, m1.rowwise() + rv1);
m3 = m1;
VERIFY_IS_APPROX(m3.rowwise() -= rv1, m1.rowwise() - rv1);
+
+ // Conversion from scalar
+ VERIFY_IS_APPROX((m3 = s1), ArrayType::Constant(rows,cols,s1));
+ VERIFY_IS_APPROX((m3 = 1), ArrayType::Constant(rows,cols,1));
+ VERIFY_IS_APPROX((m3.topLeftCorner(rows,cols) = 1), ArrayType::Constant(rows,cols,1));
+ typedef Array<Scalar,
+ ArrayType::RowsAtCompileTime==Dynamic?2:ArrayType::RowsAtCompileTime,
+ ArrayType::ColsAtCompileTime==Dynamic?2:ArrayType::ColsAtCompileTime,
+ ArrayType::Options> FixedArrayType;
+ FixedArrayType f1(s1);
+ VERIFY_IS_APPROX(f1, FixedArrayType::Constant(s1));
+ FixedArrayType f2(numext::real(s1));
+ VERIFY_IS_APPROX(f2, FixedArrayType::Constant(numext::real(s1)));
+ FixedArrayType f3((int)100*numext::real(s1));
+ VERIFY_IS_APPROX(f3, FixedArrayType::Constant((int)100*numext::real(s1)));
+ f1.setRandom();
+ FixedArrayType f4(f1.data());
+ VERIFY_IS_APPROX(f4, f1);
+
+ // Check possible conflicts with 1D ctor
+ typedef Array<Scalar, Dynamic, 1> OneDArrayType;
+ OneDArrayType o1(rows);
+ VERIFY(o1.size()==rows);
+ OneDArrayType o4((int)rows);
+ VERIFY(o4.size()==rows);
}
template<typename ArrayType> void comparisons(const ArrayType& m)
diff --git a/test/bdcsvd.cpp b/test/bdcsvd.cpp
new file mode 100644
index 000000000..52a02b697
--- /dev/null
+++ b/test/bdcsvd.cpp
@@ -0,0 +1,111 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
+// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
+// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
+// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/
+
+// discard stack allocation as that too bypasses malloc
+#define EIGEN_STACK_ALLOCATION_LIMIT 0
+#define EIGEN_RUNTIME_NO_MALLOC
+
+#include "main.h"
+#include <Eigen/SVD>
+#include <iostream>
+#include <Eigen/LU>
+
+
+#define SVD_DEFAULT(M) BDCSVD<M>
+#define SVD_FOR_MIN_NORM(M) BDCSVD<M>
+#include "svd_common.h"
+
+// Check all variants of JacobiSVD
+template<typename MatrixType>
+void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
+{
+ MatrixType m = a;
+ if(pickrandom)
+ svd_fill_random(m);
+
+ CALL_SUBTEST(( svd_test_all_computation_options<BDCSVD<MatrixType> >(m, false) ));
+}
+
+template<typename MatrixType>
+void bdcsvd_method()
+{
+ enum { Size = MatrixType::RowsAtCompileTime };
+ typedef typename MatrixType::RealScalar RealScalar;
+ typedef Matrix<RealScalar, Size, 1> RealVecType;
+ MatrixType m = MatrixType::Identity();
+ VERIFY_IS_APPROX(m.bdcSvd().singularValues(), RealVecType::Ones());
+ VERIFY_RAISES_ASSERT(m.bdcSvd().matrixU());
+ VERIFY_RAISES_ASSERT(m.bdcSvd().matrixV());
+ VERIFY_IS_APPROX(m.bdcSvd(ComputeFullU|ComputeFullV).solve(m), m);
+}
+
+// compare the Singular values returned with Jacobi and Bdc
+template<typename MatrixType>
+void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0)
+{
+ MatrixType m = MatrixType::Random(a.rows(), a.cols());
+ BDCSVD<MatrixType> bdc_svd(m);
+ JacobiSVD<MatrixType> jacobi_svd(m);
+ VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues());
+ if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
+ if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
+ if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
+ if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
+}
+
+void test_bdcsvd()
+{
+ CALL_SUBTEST_3(( svd_verify_assert<BDCSVD<Matrix3f> >(Matrix3f()) ));
+ CALL_SUBTEST_4(( svd_verify_assert<BDCSVD<Matrix4d> >(Matrix4d()) ));
+ CALL_SUBTEST_7(( svd_verify_assert<BDCSVD<MatrixXf> >(MatrixXf(10,12)) ));
+ CALL_SUBTEST_8(( svd_verify_assert<BDCSVD<MatrixXcd> >(MatrixXcd(7,5)) ));
+
+ CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd<Matrix2cd>) ));
+ CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd<Matrix2d>) ));
+
+ for(int i = 0; i < g_repeat; i++) {
+ CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
+ CALL_SUBTEST_4(( bdcsvd<Matrix4d>() ));
+ CALL_SUBTEST_5(( bdcsvd<Matrix<float,3,5> >() ));
+
+ int r = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2),
+ c = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2);
+
+ TEST_SET_BUT_UNUSED_VARIABLE(r)
+ TEST_SET_BUT_UNUSED_VARIABLE(c)
+
+ CALL_SUBTEST_6(( bdcsvd(Matrix<double,Dynamic,2>(r,2)) ));
+ CALL_SUBTEST_7(( bdcsvd(MatrixXf(r,c)) ));
+ CALL_SUBTEST_7(( compare_bdc_jacobi(MatrixXf(r,c)) ));
+ CALL_SUBTEST_10(( bdcsvd(MatrixXd(r,c)) ));
+ CALL_SUBTEST_10(( compare_bdc_jacobi(MatrixXd(r,c)) ));
+ CALL_SUBTEST_8(( bdcsvd(MatrixXcd(r,c)) ));
+ CALL_SUBTEST_8(( compare_bdc_jacobi(MatrixXcd(r,c)) ));
+
+ // Test on inf/nan matrix
+ CALL_SUBTEST_7( (svd_inf_nan<BDCSVD<MatrixXf>, MatrixXf>()) );
+ CALL_SUBTEST_10( (svd_inf_nan<BDCSVD<MatrixXd>, MatrixXd>()) );
+ }
+
+ // test matrixbase method
+ CALL_SUBTEST_1(( bdcsvd_method<Matrix2cd>() ));
+ CALL_SUBTEST_3(( bdcsvd_method<Matrix3f>() ));
+
+ // Test problem size constructors
+ CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
+
+ // Check that preallocation avoids subsequent mallocs
+ CALL_SUBTEST_9( svd_preallocate() );
+
+ CALL_SUBTEST_2( svd_underoverflow() );
+}
+
diff --git a/test/block.cpp b/test/block.cpp
index 269acd28e..3b77b704a 100644
--- a/test/block.cpp
+++ b/test/block.cpp
@@ -130,6 +130,14 @@ template<typename MatrixType> void block(const MatrixType& m)
VERIFY(numext::real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows));
VERIFY(numext::real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols));
+
+ // chekc that linear acccessors works on blocks
+ m1 = m1_copy;
+ if((MatrixType::Flags&RowMajorBit)==0)
+ VERIFY_IS_EQUAL(m1.leftCols(c1).coeff(r1+c1*rows), m1(r1,c1));
+ else
+ VERIFY_IS_EQUAL(m1.topRows(r1).coeff(c1+r1*cols), m1(r1,c1));
+
// now test some block-inside-of-block.
diff --git a/test/cholesky.cpp b/test/cholesky.cpp
index a883192ab..33e32a322 100644
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@@ -316,33 +316,35 @@ template<typename MatrixType> void cholesky_definiteness(const MatrixType& m)
{
eigen_assert(m.rows() == 2 && m.cols() == 2);
MatrixType mat;
+ LDLT<MatrixType> ldlt(2);
+
{
mat << 1, 0, 0, -1;
- LDLT<MatrixType> ldlt(mat);
+ ldlt.compute(mat);
VERIFY(!ldlt.isNegative());
VERIFY(!ldlt.isPositive());
}
{
mat << 1, 2, 2, 1;
- LDLT<MatrixType> ldlt(mat);
+ ldlt.compute(mat);
VERIFY(!ldlt.isNegative());
VERIFY(!ldlt.isPositive());
}
{
mat << 0, 0, 0, 0;
- LDLT<MatrixType> ldlt(mat);
+ ldlt.compute(mat);
VERIFY(ldlt.isNegative());
VERIFY(ldlt.isPositive());
}
{
mat << 0, 0, 0, 1;
- LDLT<MatrixType> ldlt(mat);
+ ldlt.compute(mat);
VERIFY(!ldlt.isNegative());
VERIFY(ldlt.isPositive());
}
{
mat << -1, 0, 0, 0;
- LDLT<MatrixType> ldlt(mat);
+ ldlt.compute(mat);
VERIFY(ldlt.isNegative());
VERIFY(!ldlt.isPositive());
}
diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu
index 4c7e96c10..300bced02 100644
--- a/test/cuda_basic.cu
+++ b/test/cuda_basic.cu
@@ -65,7 +65,7 @@ struct redux {
};
template<typename T1, typename T2>
-struct prod {
+struct prod_test {
EIGEN_DEVICE_FUNC
void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
{
@@ -125,8 +125,8 @@ void test_cuda_basic()
CALL_SUBTEST( run_and_compare_to_cuda(redux<Array4f>(), nthreads, in, out) );
CALL_SUBTEST( run_and_compare_to_cuda(redux<Matrix3f>(), nthreads, in, out) );
- CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix3f,Matrix3f>(), nthreads, in, out) );
- CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix4f,Vector4f>(), nthreads, in, out) );
+ CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix3f,Matrix3f>(), nthreads, in, out) );
+ CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix4f,Vector4f>(), nthreads, in, out) );
CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) );
CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) );
diff --git a/test/diagonalmatrices.cpp b/test/diagonalmatrices.cpp
index 149f1db2f..0227ba577 100644
--- a/test/diagonalmatrices.cpp
+++ b/test/diagonalmatrices.cpp
@@ -84,6 +84,13 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
VERIFY_IS_APPROX(m1 * (rdm1 * s1), (m1 * rdm1) * s1);
VERIFY_IS_APPROX(m1 * (s1 * rdm1), (m1 * rdm1) * s1);
+
+ // Diagonal to dense
+ sq_m1.setRandom();
+ sq_m2 = sq_m1;
+ VERIFY_IS_APPROX( (sq_m1 += (s1*v1).asDiagonal()), sq_m2 += (s1*v1).asDiagonal().toDenseMatrix() );
+ VERIFY_IS_APPROX( (sq_m1 -= (s1*v1).asDiagonal()), sq_m2 -= (s1*v1).asDiagonal().toDenseMatrix() );
+ VERIFY_IS_APPROX( (sq_m1 = (s1*v1).asDiagonal()), (s1*v1).asDiagonal().toDenseMatrix() );
}
void test_diagonalmatrices()
diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp
index 3851f9df2..935736328 100644
--- a/test/eigensolver_selfadjoint.cpp
+++ b/test/eigensolver_selfadjoint.cpp
@@ -111,8 +111,17 @@ template<typename MatrixType> void selfadjointeigensolver(const MatrixType& m)
// test Tridiagonalization's methods
Tridiagonalization<MatrixType> tridiag(symmC);
- // FIXME tridiag.matrixQ().adjoint() does not work
+ VERIFY_IS_APPROX(tridiag.diagonal(), tridiag.matrixT().diagonal());
+ VERIFY_IS_APPROX(tridiag.subDiagonal(), tridiag.matrixT().template diagonal<-1>());
+ MatrixType T = tridiag.matrixT();
+ if(rows>1 && cols>1) {
+ // FIXME check that upper and lower part are 0:
+ //VERIFY(T.topRightCorner(rows-2, cols-2).template triangularView<Upper>().isZero());
+ }
+ VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal().real());
+ VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>().real());
VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView<Lower>()), tridiag.matrixQ() * tridiag.matrixT().eval() * MatrixType(tridiag.matrixQ()).adjoint());
+ VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView<Lower>()), tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint());
// Test computation of eigenvalues from tridiagonal matrix
if(rows > 1)
@@ -136,11 +145,14 @@ void test_eigensolver_selfadjoint()
{
int s = 0;
for(int i = 0; i < g_repeat; i++) {
+ // trivial test for 1x1 matrices:
+ CALL_SUBTEST_1( selfadjointeigensolver(Matrix<float, 1, 1>()));
+ CALL_SUBTEST_1( selfadjointeigensolver(Matrix<double, 1, 1>()));
// very important to test 3x3 and 2x2 matrices since we provide special paths for them
- CALL_SUBTEST_1( selfadjointeigensolver(Matrix2f()) );
- CALL_SUBTEST_1( selfadjointeigensolver(Matrix2d()) );
- CALL_SUBTEST_1( selfadjointeigensolver(Matrix3f()) );
- CALL_SUBTEST_1( selfadjointeigensolver(Matrix3d()) );
+ CALL_SUBTEST_12( selfadjointeigensolver(Matrix2f()) );
+ CALL_SUBTEST_12( selfadjointeigensolver(Matrix2d()) );
+ CALL_SUBTEST_13( selfadjointeigensolver(Matrix3f()) );
+ CALL_SUBTEST_13( selfadjointeigensolver(Matrix3d()) );
CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) );
diff --git a/test/evaluators.cpp b/test/evaluators.cpp
index e3922c1be..f41968da8 100644
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@@ -1,7 +1,78 @@
-#define EIGEN_ENABLE_EVALUATORS
+
#include "main.h"
-using internal::copy_using_evaluator;
+namespace Eigen {
+
+ template<typename DstXprType, typename SrcXprType>
+ EIGEN_STRONG_INLINE
+ DstXprType& copy_using_evaluator(const EigenBase<DstXprType> &dst, const SrcXprType &src)
+ {
+ call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+ return dst.const_cast_derived();
+ }
+
+ template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
+ EIGEN_STRONG_INLINE
+ const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, const SrcXprType &src)
+ {
+ call_assignment(dst, src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+ return dst.expression();
+ }
+
+ template<typename DstXprType, typename SrcXprType>
+ EIGEN_STRONG_INLINE
+ DstXprType& copy_using_evaluator(const PlainObjectBase<DstXprType> &dst, const SrcXprType &src)
+ {
+ #ifdef EIGEN_NO_AUTOMATIC_RESIZING
+ eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
+ : (dst.rows() == src.rows() && dst.cols() == src.cols())))
+ && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+ #else
+ dst.const_cast_derived().resizeLike(src.derived());
+ #endif
+
+ call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+ return dst.const_cast_derived();
+ }
+
+ template<typename DstXprType, typename SrcXprType>
+ void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+ {
+ typedef typename DstXprType::Scalar Scalar;
+ call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::add_assign_op<Scalar>());
+ }
+
+ template<typename DstXprType, typename SrcXprType>
+ void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+ {
+ typedef typename DstXprType::Scalar Scalar;
+ call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::sub_assign_op<Scalar>());
+ }
+
+ template<typename DstXprType, typename SrcXprType>
+ void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+ {
+ typedef typename DstXprType::Scalar Scalar;
+ call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op<Scalar>());
+ }
+
+ template<typename DstXprType, typename SrcXprType>
+ void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+ {
+ typedef typename DstXprType::Scalar Scalar;
+ call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op<Scalar>());
+ }
+
+ template<typename DstXprType, typename SrcXprType>
+ void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+ {
+ typedef typename DstXprType::Scalar Scalar;
+ call_assignment(dst.const_cast_derived(), src.const_cast_derived(), internal::swap_assign_op<Scalar>());
+ }
+
+}
+
+
using namespace std;
#define VERIFY_IS_APPROX_EVALUATOR(DEST,EXPR) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (EXPR).eval());
@@ -72,8 +143,19 @@ void test_evaluators()
c = a*a;
copy_using_evaluator(a, prod(a,a));
VERIFY_IS_APPROX(a,c);
+
+ // check compound assignment of products
+ d = c;
+ add_assign_using_evaluator(c.noalias(), prod(a,b));
+ d.noalias() += a*b;
+ VERIFY_IS_APPROX(c, d);
+
+ d = c;
+ subtract_assign_using_evaluator(c.noalias(), prod(a,b));
+ d.noalias() -= a*b;
+ VERIFY_IS_APPROX(c, d);
}
-
+
{
// test product with all possible sizes
int s = internal::random<int>(1,100);
@@ -124,7 +206,7 @@ void test_evaluators()
// this does not work because Random is eval-before-nested:
// copy_using_evaluator(w, Vector2d::Random().transpose());
-
+
// test CwiseUnaryOp
VERIFY_IS_APPROX_EVALUATOR(v2, 3 * v);
VERIFY_IS_APPROX_EVALUATOR(w, (3 * v).transpose());
@@ -327,4 +409,56 @@ void test_evaluators()
arr_ref.row(1) /= (arr_ref.row(2) + 1);
VERIFY_IS_APPROX(arr, arr_ref);
}
+
+ {
+ // test triangular shapes
+ MatrixXd A = MatrixXd::Random(6,6), B(6,6), C(6,6), D(6,6);
+ A.setRandom();B.setRandom();
+ VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView<Upper>(), MatrixXd(A.triangularView<Upper>()));
+
+ A.setRandom();B.setRandom();
+ VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView<UnitLower>(), MatrixXd(A.triangularView<UnitLower>()));
+
+ A.setRandom();B.setRandom();
+ VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView<UnitUpper>(), MatrixXd(A.triangularView<UnitUpper>()));
+
+ A.setRandom();B.setRandom();
+ C = B; C.triangularView<Upper>() = A;
+ copy_using_evaluator(B.triangularView<Upper>(), A);
+ VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView<Upper>(), A)");
+
+ A.setRandom();B.setRandom();
+ C = B; C.triangularView<Lower>() = A.triangularView<Lower>();
+ copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Lower>());
+ VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Lower>())");
+
+
+ A.setRandom();B.setRandom();
+ C = B; C.triangularView<Lower>() = A.triangularView<Upper>().transpose();
+ copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Upper>().transpose());
+ VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Lower>().transpose())");
+
+
+ A.setRandom();B.setRandom(); C = B; D = A;
+ C.triangularView<Upper>().swap(D.triangularView<Upper>());
+ swap_using_evaluator(B.triangularView<Upper>(), A.triangularView<Upper>());
+ VERIFY(B.isApprox(C) && "swap_using_evaluator(B.triangularView<Upper>(), A.triangularView<Upper>())");
+
+
+ VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.triangularView<Upper>(),A), MatrixXd(A.triangularView<Upper>()*A));
+
+ VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.selfadjointView<Upper>(),A), MatrixXd(A.selfadjointView<Upper>()*A));
+
+ }
+
+ {
+ // test diagonal shapes
+ VectorXd d = VectorXd::Random(6);
+ MatrixXd A = MatrixXd::Random(6,6), B(6,6);
+ A.setRandom();B.setRandom();
+
+ VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(d.asDiagonal(),A), MatrixXd(d.asDiagonal()*A));
+ VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(A,d.asDiagonal()), MatrixXd(A*d.asDiagonal()));
+
+ }
}
diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp
index c91bde819..2f9d18c0f 100644
--- a/test/geo_homogeneous.cpp
+++ b/test/geo_homogeneous.cpp
@@ -38,6 +38,10 @@ template<typename Scalar,int Size> void homogeneous(void)
hv0 << v0, 1;
VERIFY_IS_APPROX(v0.homogeneous(), hv0);
VERIFY_IS_APPROX(v0, hv0.hnormalized());
+
+ VERIFY_IS_APPROX(v0.homogeneous().sum(), hv0.sum());
+ VERIFY_IS_APPROX(v0.homogeneous().minCoeff(), hv0.minCoeff());
+ VERIFY_IS_APPROX(v0.homogeneous().maxCoeff(), hv0.maxCoeff());
hm0 << m0, ones.transpose();
VERIFY_IS_APPROX(m0.colwise().homogeneous(), hm0);
@@ -57,7 +61,6 @@ template<typename Scalar,int Size> void homogeneous(void)
VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2,
v0.transpose().rowwise().homogeneous() * t2);
- m0.transpose().rowwise().homogeneous().eval();
VERIFY_IS_APPROX((m0.transpose().rowwise().homogeneous().eval()) * t2,
m0.transpose().rowwise().homogeneous() * t2);
@@ -82,7 +85,7 @@ template<typename Scalar,int Size> void homogeneous(void)
VERIFY_IS_APPROX(aff * pts.colwise().homogeneous(), (aff * pts1).colwise().hnormalized());
VERIFY_IS_APPROX(caff * pts.colwise().homogeneous(), (caff * pts1).colwise().hnormalized());
VERIFY_IS_APPROX(proj * pts.colwise().homogeneous(), (proj * pts1));
-
+
VERIFY_IS_APPROX((aff * pts1).colwise().hnormalized(), aff * pts);
VERIFY_IS_APPROX((caff * pts1).colwise().hnormalized(), caff * pts);
diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp
index ed5928f10..aa744a3ea 100644
--- a/test/geo_hyperplane.cpp
+++ b/test/geo_hyperplane.cpp
@@ -124,6 +124,33 @@ template<typename Scalar> void lines()
}
}
+template<typename Scalar> void planes()
+{
+ using std::abs;
+ typedef Hyperplane<Scalar, 3> Plane;
+ typedef Matrix<Scalar,3,1> Vector;
+ typedef Matrix<Scalar,4,1> CoeffsType;
+
+ for(int i = 0; i < 10; i++)
+ {
+ Vector v0 = Vector::Random();
+ Vector v1(v0), v2(v0);
+ if(internal::random<double>(0,1)>0.25)
+ v1 += Vector::Random();
+ if(internal::random<double>(0,1)>0.25)
+ v2 += v1 * std::pow(internal::random<Scalar>(0,1),internal::random<int>(1,16));
+ if(internal::random<double>(0,1)>0.25)
+ v2 += Vector::Random() * std::pow(internal::random<Scalar>(0,1),internal::random<int>(1,16));
+
+ Plane p0 = Plane::Through(v0, v1, v2);
+
+ VERIFY_IS_APPROX(p0.normal().norm(), Scalar(1));
+ VERIFY_IS_MUCH_SMALLER_THAN(p0.absDistance(v0), Scalar(1));
+ VERIFY_IS_MUCH_SMALLER_THAN(p0.absDistance(v1), Scalar(1));
+ VERIFY_IS_MUCH_SMALLER_THAN(p0.absDistance(v2), Scalar(1));
+ }
+}
+
template<typename Scalar> void hyperplane_alignment()
{
typedef Hyperplane<Scalar,3,AutoAlign> Plane3a;
@@ -163,5 +190,7 @@ void test_geo_hyperplane()
CALL_SUBTEST_4( hyperplane(Hyperplane<std::complex<double>,5>()) );
CALL_SUBTEST_1( lines<float>() );
CALL_SUBTEST_3( lines<double>() );
+ CALL_SUBTEST_2( planes<float>() );
+ CALL_SUBTEST_5( planes<double>() );
}
}
diff --git a/test/geo_orthomethods.cpp b/test/geo_orthomethods.cpp
index c836dae40..e178df257 100644
--- a/test/geo_orthomethods.cpp
+++ b/test/geo_orthomethods.cpp
@@ -33,12 +33,16 @@ template<typename Scalar> void orthomethods_3()
VERIFY_IS_MUCH_SMALLER_THAN(v1.dot(v1.cross(v2)), Scalar(1));
VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(v2).dot(v2), Scalar(1));
VERIFY_IS_MUCH_SMALLER_THAN(v2.dot(v1.cross(v2)), Scalar(1));
+ VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(Vector3::Random()).dot(v1), Scalar(1));
Matrix3 mat3;
mat3 << v0.normalized(),
(v0.cross(v1)).normalized(),
(v0.cross(v1).cross(v0)).normalized();
VERIFY(mat3.isUnitary());
-
+
+ mat3.setRandom();
+ VERIFY_IS_APPROX(v0.cross(mat3*v1), -(mat3*v1).cross(v0));
+ VERIFY_IS_APPROX(v0.cross(mat3.lazyProduct(v1)), -(mat3.lazyProduct(v1)).cross(v0));
// colwise/rowwise cross product
mat3.setRandom();
@@ -47,6 +51,13 @@ template<typename Scalar> void orthomethods_3()
int i = internal::random<int>(0,2);
mcross = mat3.colwise().cross(vec3);
VERIFY_IS_APPROX(mcross.col(i), mat3.col(i).cross(vec3));
+
+ VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(vec3)).diagonal().cwiseAbs().sum(), Scalar(1));
+ VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(Vector3::Random())).diagonal().cwiseAbs().sum(), Scalar(1));
+
+ VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * mat3.colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1));
+ VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * Matrix3::Random().colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1));
+
mcross = mat3.rowwise().cross(vec3);
VERIFY_IS_APPROX(mcross.row(i), mat3.row(i).cross(vec3));
@@ -57,6 +68,7 @@ template<typename Scalar> void orthomethods_3()
v40.w() = v41.w() = v42.w() = 0;
v42.template head<3>() = v40.template head<3>().cross(v41.template head<3>());
VERIFY_IS_APPROX(v40.cross3(v41), v42);
+ VERIFY_IS_MUCH_SMALLER_THAN(v40.cross3(Vector4::Random()).dot(v40), Scalar(1));
// check mixed product
typedef Matrix<RealScalar, 3, 1> RealVector3;
diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp
index 7d9080333..042dd0329 100644
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@@ -98,11 +98,17 @@ template<typename Scalar, int Mode, int Options> void transformations()
Matrix3 matrot1, m;
Scalar a = internal::random<Scalar>(-Scalar(M_PI), Scalar(M_PI));
- Scalar s0 = internal::random<Scalar>();
+ Scalar s0 = internal::random<Scalar>(), s1 = internal::random<Scalar>();
+
+ while(v0.norm() < test_precision<Scalar>()) v0 = Vector3::Random();
+ while(v1.norm() < test_precision<Scalar>()) v1 = Vector3::Random();
VERIFY_IS_APPROX(v0, AngleAxisx(a, v0.normalized()) * v0);
VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(M_PI), v0.unitOrthogonal()) * v0);
- VERIFY_IS_APPROX(cos(a)*v0.squaredNorm(), v0.dot(AngleAxisx(a, v0.unitOrthogonal()) * v0));
+ if(abs(cos(a)) > test_precision<Scalar>())
+ {
+ VERIFY_IS_APPROX(cos(a)*v0.squaredNorm(), v0.dot(AngleAxisx(a, v0.unitOrthogonal()) * v0));
+ }
m = AngleAxisx(a, v0.normalized()).toRotationMatrix().adjoint();
VERIFY_IS_APPROX(Matrix3::Identity(), m * AngleAxisx(a, v0.normalized()));
VERIFY_IS_APPROX(Matrix3::Identity(), AngleAxisx(a, v0.normalized()) * m);
@@ -123,11 +129,18 @@ template<typename Scalar, int Mode, int Options> void transformations()
// angle-axis conversion
AngleAxisx aa = AngleAxisx(q1);
VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1);
- VERIFY_IS_NOT_APPROX(q1 * v1, Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1);
+
+ if(abs(aa.angle()) > NumTraits<Scalar>::dummy_precision())
+ {
+ VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) );
+ }
aa.fromRotationMatrix(aa.toRotationMatrix());
VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1);
- VERIFY_IS_NOT_APPROX(q1 * v1, Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1);
+ if(abs(aa.angle()) > NumTraits<Scalar>::dummy_precision())
+ {
+ VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) );
+ }
// AngleAxis
VERIFY_IS_APPROX(AngleAxisx(a,v1.normalized()).toRotationMatrix(),
@@ -347,7 +360,9 @@ template<typename Scalar, int Mode, int Options> void transformations()
// test transform inversion
t0.setIdentity();
t0.translate(v0);
- t0.linear().setRandom();
+ do {
+ t0.linear().setRandom();
+ } while(t0.linear().jacobiSvd().singularValues()(2)<test_precision<Scalar>());
Matrix4 t044 = Matrix4::Zero();
t044(3,3) = 1;
t044.block(0,0,t0.matrix().rows(),4) = t0.matrix();
@@ -394,9 +409,29 @@ template<typename Scalar, int Mode, int Options> void transformations()
Rotation2D<double> r2d1d = r2d1.template cast<double>();
VERIFY_IS_APPROX(r2d1d.template cast<Scalar>(),r2d1);
- t20 = Translation2(v20) * (Rotation2D<Scalar>(s0) * Eigen::Scaling(s0));
- t21 = Translation2(v20) * Rotation2D<Scalar>(s0) * Eigen::Scaling(s0);
+ Rotation2D<Scalar> R0(s0), R1(s1);
+
+ t20 = Translation2(v20) * (R0 * Eigen::Scaling(s0));
+ t21 = Translation2(v20) * R0 * Eigen::Scaling(s0);
VERIFY_IS_APPROX(t20,t21);
+
+ t20 = Translation2(v20) * (R0 * R0.inverse() * Eigen::Scaling(s0));
+ t21 = Translation2(v20) * Eigen::Scaling(s0);
+ VERIFY_IS_APPROX(t20,t21);
+
+ VERIFY_IS_APPROX(s0, (R0.slerp(0, R1)).angle());
+ VERIFY_IS_APPROX(s1, (R0.slerp(1, R1)).angle());
+ VERIFY_IS_APPROX(s0, (R0.slerp(0.5, R0)).angle());
+ VERIFY_IS_APPROX(Scalar(0), (R0.slerp(0.5, R0.inverse())).angle());
+
+ // check basic features
+ {
+ Rotation2D<Scalar> r1; // default ctor
+ r1 = Rotation2D<Scalar>(s0); // copy assignment
+ VERIFY_IS_APPROX(r1.angle(),s0);
+ Rotation2D<Scalar> r2(r1); // copy ctor
+ VERIFY_IS_APPROX(r2.angle(),s0);
+ }
}
template<typename Scalar> void transform_alignment()
diff --git a/test/inverse.cpp b/test/inverse.cpp
index 8187b088d..1e7b20958 100644
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -68,6 +68,15 @@ template<typename MatrixType> void inverse(const MatrixType& m)
VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1));
m3.computeInverseWithCheck(m4, invertible);
VERIFY( rows==1 ? invertible : !invertible );
+
+ // check with submatrices
+ {
+ Matrix<Scalar, MatrixType::RowsAtCompileTime+1, MatrixType::RowsAtCompileTime+1, MatrixType::Options> m3;
+ m3.setRandom();
+ m3.topLeftCorner(rows,rows) = m1;
+ m2 = m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>().inverse();
+ VERIFY_IS_APPROX( (m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>()), m2.inverse() );
+ }
#endif
// check in-place inversion
diff --git a/test/jacobisvd.cpp b/test/jacobisvd.cpp
index cd04db5be..f9de6b708 100644
--- a/test/jacobisvd.cpp
+++ b/test/jacobisvd.cpp
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -14,273 +14,47 @@
#include "main.h"
#include <Eigen/SVD>
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_check_full(const MatrixType& m, const JacobiSVD<MatrixType, QRPreconditioner>& svd)
-{
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
-
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime
- };
-
- typedef typename MatrixType::Scalar Scalar;
- typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
- typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
-
- MatrixType sigma = MatrixType::Zero(rows,cols);
- sigma.diagonal() = svd.singularValues().template cast<Scalar>();
- MatrixUType u = svd.matrixU();
- MatrixVType v = svd.matrixV();
-
- VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
- VERIFY_IS_UNITARY(u);
- VERIFY_IS_UNITARY(v);
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_compare_to_full(const MatrixType& m,
- unsigned int computationOptions,
- const JacobiSVD<MatrixType, QRPreconditioner>& referenceSvd)
-{
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
- Index diagSize = (std::min)(rows, cols);
-
- JacobiSVD<MatrixType, QRPreconditioner> svd(m, computationOptions);
-
- VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
- if(computationOptions & ComputeFullU)
- VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
- if(computationOptions & ComputeThinU)
- VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
- if(computationOptions & ComputeFullV)
- VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
- if(computationOptions & ComputeThinV)
- VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
- typedef typename MatrixType::Scalar Scalar;
- typedef typename MatrixType::RealScalar RealScalar;
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
-
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime
- };
-
- typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType;
- typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
-
- RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
- JacobiSVD<MatrixType, QRPreconditioner> svd(m, computationOptions);
-
- if(internal::is_same<RealScalar,double>::value) svd.setThreshold(1e-8);
- else if(internal::is_same<RealScalar,float>::value) svd.setThreshold(1e-4);
-
- SolutionType x = svd.solve(rhs);
-
- RealScalar residual = (m*x-rhs).norm();
- // Check that there is no significantly better solution in the neighborhood of x
- if(!test_isMuchSmallerThan(residual,rhs.norm()))
- {
- // If the residual is very small, then we have an exact solution, so we are already good.
- for(int k=0;k<x.rows();++k)
- {
- SolutionType y(x);
- y.row(k).array() += 2*NumTraits<RealScalar>::epsilon();
- RealScalar residual_y = (m*y-rhs).norm();
- VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
-
- y.row(k) = x.row(k).array() - 2*NumTraits<RealScalar>::epsilon();
- residual_y = (m*y-rhs).norm();
- VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
- }
- }
-
- // evaluate normal equation which works also for least-squares solutions
- if(internal::is_same<RealScalar,double>::value)
- {
- // This test is not stable with single precision.
- // This is probably because squaring m signicantly affects the precision.
- VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs);
- }
-
- // check minimal norm solutions
- {
- // generate a full-rank m x n problem with m<n
- enum {
- RankAtCompileTime2 = ColsAtCompileTime==Dynamic ? Dynamic : (ColsAtCompileTime)/2+1,
- RowsAtCompileTime3 = ColsAtCompileTime==Dynamic ? Dynamic : ColsAtCompileTime+1
- };
- typedef Matrix<Scalar, RankAtCompileTime2, ColsAtCompileTime> MatrixType2;
- typedef Matrix<Scalar, RankAtCompileTime2, 1> RhsType2;
- typedef Matrix<Scalar, ColsAtCompileTime, RankAtCompileTime2> MatrixType2T;
- Index rank = RankAtCompileTime2==Dynamic ? internal::random<Index>(1,cols) : Index(RankAtCompileTime2);
- MatrixType2 m2(rank,cols);
- int guard = 0;
- do {
- m2.setRandom();
- } while(m2.jacobiSvd().setThreshold(test_precision<Scalar>()).rank()!=rank && (++guard)<10);
- VERIFY(guard<10);
- RhsType2 rhs2 = RhsType2::Random(rank);
- // use QR to find a reference minimal norm solution
- HouseholderQR<MatrixType2T> qr(m2.adjoint());
- Matrix<Scalar,Dynamic,1> tmp = qr.matrixQR().topLeftCorner(rank,rank).template triangularView<Upper>().adjoint().solve(rhs2);
- tmp.conservativeResize(cols);
- tmp.tail(cols-rank).setZero();
- SolutionType x21 = qr.householderQ() * tmp;
- // now check with SVD
- JacobiSVD<MatrixType2, ColPivHouseholderQRPreconditioner> svd2(m2, computationOptions);
- SolutionType x22 = svd2.solve(rhs2);
- VERIFY_IS_APPROX(m2*x21, rhs2);
- VERIFY_IS_APPROX(m2*x22, rhs2);
- VERIFY_IS_APPROX(x21, x22);
-
- // Now check with a rank deficient matrix
- typedef Matrix<Scalar, RowsAtCompileTime3, ColsAtCompileTime> MatrixType3;
- typedef Matrix<Scalar, RowsAtCompileTime3, 1> RhsType3;
- Index rows3 = RowsAtCompileTime3==Dynamic ? internal::random<Index>(rank+1,2*cols) : Index(RowsAtCompileTime3);
- Matrix<Scalar,RowsAtCompileTime3,Dynamic> C = Matrix<Scalar,RowsAtCompileTime3,Dynamic>::Random(rows3,rank);
- MatrixType3 m3 = C * m2;
- RhsType3 rhs3 = C * rhs2;
- JacobiSVD<MatrixType3, ColPivHouseholderQRPreconditioner> svd3(m3, computationOptions);
- SolutionType x3 = svd3.solve(rhs3);
- VERIFY_IS_APPROX(m3*x3, rhs3);
- VERIFY_IS_APPROX(m3*x21, rhs3);
- VERIFY_IS_APPROX(m2*x3, rhs2);
-
- VERIFY_IS_APPROX(x21, x3);
- }
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_test_all_computation_options(const MatrixType& m)
-{
- if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
- return;
- JacobiSVD<MatrixType, QRPreconditioner> fullSvd(m, ComputeFullU|ComputeFullV);
- CALL_SUBTEST(( jacobisvd_check_full(m, fullSvd) ));
- CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeFullU | ComputeFullV) ));
-
- #if defined __INTEL_COMPILER
- // remark #111: statement is unreachable
- #pragma warning disable 111
- #endif
- if(QRPreconditioner == FullPivHouseholderQRPreconditioner)
- return;
-
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullU, fullSvd) ));
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullV, fullSvd) ));
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, 0, fullSvd) ));
-
- if (MatrixType::ColsAtCompileTime == Dynamic) {
- // thin U/V are only available with dynamic number of columns
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) ));
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinV, fullSvd) ));
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) ));
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU , fullSvd) ));
- CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) ));
- CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeFullU | ComputeThinV) ));
- CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeThinU | ComputeFullV) ));
- CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeThinU | ComputeThinV) ));
-
- // test reconstruction
- typedef typename MatrixType::Index Index;
- Index diagSize = (std::min)(m.rows(), m.cols());
- JacobiSVD<MatrixType, QRPreconditioner> svd(m, ComputeThinU | ComputeThinV);
- VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
- }
-}
+#define SVD_DEFAULT(M) JacobiSVD<M>
+#define SVD_FOR_MIN_NORM(M) JacobiSVD<M,ColPivHouseholderQRPreconditioner>
+#include "svd_common.h"
+// Check all variants of JacobiSVD
template<typename MatrixType>
void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
{
MatrixType m = a;
if(pickrandom)
- {
- typedef typename MatrixType::Scalar Scalar;
- typedef typename MatrixType::RealScalar RealScalar;
- typedef typename MatrixType::Index Index;
- Index diagSize = (std::min)(a.rows(), a.cols());
- RealScalar s = std::numeric_limits<RealScalar>::max_exponent10/4;
- s = internal::random<RealScalar>(1,s);
- Matrix<RealScalar,Dynamic,1> d = Matrix<RealScalar,Dynamic,1>::Random(diagSize);
- for(Index k=0; k<diagSize; ++k)
- d(k) = d(k)*std::pow(RealScalar(10),internal::random<RealScalar>(-s,s));
- m = Matrix<Scalar,Dynamic,Dynamic>::Random(a.rows(),diagSize) * d.asDiagonal() * Matrix<Scalar,Dynamic,Dynamic>::Random(diagSize,a.cols());
- // cancel some coeffs
- Index n = internal::random<Index>(0,m.size()-1);
- for(Index i=0; i<n; ++i)
- m(internal::random<Index>(0,m.rows()-1), internal::random<Index>(0,m.cols()-1)) = Scalar(0);
- }
+ svd_fill_random(m);
- CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, FullPivHouseholderQRPreconditioner>(m) ));
- CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, ColPivHouseholderQRPreconditioner>(m) ));
- CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, HouseholderQRPreconditioner>(m) ));
- CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, NoQRPreconditioner>(m) ));
+ CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> >(m, true) )); // check full only
+ CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner> >(m, false) ));
+ CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, HouseholderQRPreconditioner> >(m, false) ));
+ if(m.rows()==m.cols())
+ CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, NoQRPreconditioner> >(m, false) ));
}
template<typename MatrixType> void jacobisvd_verify_assert(const MatrixType& m)
{
- typedef typename MatrixType::Scalar Scalar;
+ svd_verify_assert<JacobiSVD<MatrixType> >(m);
typedef typename MatrixType::Index Index;
Index rows = m.rows();
Index cols = m.cols();
enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime
};
- typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType;
-
- RhsType rhs(rows);
-
- JacobiSVD<MatrixType> svd;
- VERIFY_RAISES_ASSERT(svd.matrixU())
- VERIFY_RAISES_ASSERT(svd.singularValues())
- VERIFY_RAISES_ASSERT(svd.matrixV())
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
MatrixType a = MatrixType::Zero(rows, cols);
a.setZero();
- svd.compute(a, 0);
- VERIFY_RAISES_ASSERT(svd.matrixU())
- VERIFY_RAISES_ASSERT(svd.matrixV())
- svd.singularValues();
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
if (ColsAtCompileTime == Dynamic)
{
- svd.compute(a, ComputeThinU);
- svd.matrixU();
- VERIFY_RAISES_ASSERT(svd.matrixV())
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
-
- svd.compute(a, ComputeThinV);
- svd.matrixV();
- VERIFY_RAISES_ASSERT(svd.matrixU())
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
-
JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> svd_fullqr;
VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV))
VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV))
VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV))
}
- else
- {
- VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
- VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
- }
}
template<typename MatrixType>
@@ -296,165 +70,17 @@ void jacobisvd_method()
VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m);
}
-// work around stupid msvc error when constructing at compile time an expression that involves
-// a division by zero, even if the numeric type has floating point
-template<typename Scalar>
-EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
-
-// workaround aggressive optimization in ICC
-template<typename T> EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; }
-
-template<typename MatrixType>
-void jacobisvd_inf_nan()
-{
- // all this function does is verify we don't iterate infinitely on nan/inf values
-
- JacobiSVD<MatrixType> svd;
- typedef typename MatrixType::Scalar Scalar;
- Scalar some_inf = Scalar(1) / zero<Scalar>();
- VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
- svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
-
- Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
- VERIFY(nan != nan);
- svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV);
-
- MatrixType m = MatrixType::Zero(10,10);
- m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
- svd.compute(m, ComputeFullU | ComputeFullV);
-
- m = MatrixType::Zero(10,10);
- m(internal::random<int>(0,9), internal::random<int>(0,9)) = nan;
- svd.compute(m, ComputeFullU | ComputeFullV);
-
- // regression test for bug 791
- m.resize(3,3);
- m << 0, 2*NumTraits<Scalar>::epsilon(), 0.5,
- 0, -0.5, 0,
- nan, 0, 0;
- svd.compute(m, ComputeFullU | ComputeFullV);
-
- m.resize(4,4);
- m << 1, 0, 0, 0,
- 0, 3, 1, 2e-308,
- 1, 0, 1, nan,
- 0, nan, nan, 0;
- svd.compute(m, ComputeFullU | ComputeFullV);
-}
-
-// Regression test for bug 286: JacobiSVD loops indefinitely with some
-// matrices containing denormal numbers.
-void jacobisvd_underoverflow()
-{
-#if defined __INTEL_COMPILER
-// shut up warning #239: floating point underflow
-#pragma warning push
-#pragma warning disable 239
-#endif
- Matrix2d M;
- M << -7.90884e-313, -4.94e-324,
- 0, 5.60844e-313;
- JacobiSVD<Matrix2d> svd;
- svd.compute(M,ComputeFullU|ComputeFullV);
- jacobisvd_check_full(M,svd);
-
- VectorXd value_set(9);
- value_set << 0, 1, -1, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -4.94e-223, 4.94e-223;
- Array4i id(0,0,0,0);
- int k = 0;
- do
- {
- M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3));
- svd.compute(M,ComputeFullU|ComputeFullV);
- jacobisvd_check_full(M,svd);
-
- id(k)++;
- if(id(k)>=value_set.size())
- {
- while(k<3 && id(k)>=value_set.size()) id(++k)++;
- id.head(k).setZero();
- k=0;
- }
-
- } while((id<int(value_set.size())).all());
-
-#if defined __INTEL_COMPILER
-#pragma warning pop
-#endif
-
- // Check for overflow:
- Matrix3d M3;
- M3 << 4.4331978442502944e+307, -5.8585363752028680e+307, 6.4527017443412964e+307,
- 3.7841695601406358e+307, 2.4331702789740617e+306, -3.5235707140272905e+307,
- -8.7190887618028355e+307, -7.3453213709232193e+307, -2.4367363684472105e+307;
-
- JacobiSVD<Matrix3d> svd3;
- svd3.compute(M3,ComputeFullU|ComputeFullV); // just check we don't loop indefinitely
- jacobisvd_check_full(M3,svd3);
-}
-
-void jacobisvd_preallocate()
-{
- Vector3f v(3.f, 2.f, 1.f);
- MatrixXf m = v.asDiagonal();
-
- internal::set_is_malloc_allowed(false);
- VERIFY_RAISES_ASSERT(VectorXf tmp(10);)
- JacobiSVD<MatrixXf> svd;
- internal::set_is_malloc_allowed(true);
- svd.compute(m);
- VERIFY_IS_APPROX(svd.singularValues(), v);
-
- JacobiSVD<MatrixXf> svd2(3,3);
- internal::set_is_malloc_allowed(false);
- svd2.compute(m);
- internal::set_is_malloc_allowed(true);
- VERIFY_IS_APPROX(svd2.singularValues(), v);
- VERIFY_RAISES_ASSERT(svd2.matrixU());
- VERIFY_RAISES_ASSERT(svd2.matrixV());
- svd2.compute(m, ComputeFullU | ComputeFullV);
- VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
- VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
- internal::set_is_malloc_allowed(false);
- svd2.compute(m);
- internal::set_is_malloc_allowed(true);
-
- JacobiSVD<MatrixXf> svd3(3,3,ComputeFullU|ComputeFullV);
- internal::set_is_malloc_allowed(false);
- svd2.compute(m);
- internal::set_is_malloc_allowed(true);
- VERIFY_IS_APPROX(svd2.singularValues(), v);
- VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
- VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
- internal::set_is_malloc_allowed(false);
- svd2.compute(m, ComputeFullU|ComputeFullV);
- internal::set_is_malloc_allowed(true);
-}
-
void test_jacobisvd()
{
CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) ));
CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) ));
CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) ));
CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) ));
+
+ CALL_SUBTEST_11(svd_all_trivial_2x2(jacobisvd<Matrix2cd>));
+ CALL_SUBTEST_12(svd_all_trivial_2x2(jacobisvd<Matrix2d>));
for(int i = 0; i < g_repeat; i++) {
- Matrix2cd m;
- m << 0, 1,
- 0, 1;
- CALL_SUBTEST_1(( jacobisvd(m, false) ));
- m << 1, 0,
- 1, 0;
- CALL_SUBTEST_1(( jacobisvd(m, false) ));
-
- Matrix2d n;
- n << 0, 0,
- 0, 0;
- CALL_SUBTEST_2(( jacobisvd(n, false) ));
- n << 0, 0,
- 0, 1;
- CALL_SUBTEST_2(( jacobisvd(n, false) ));
-
CALL_SUBTEST_3(( jacobisvd<Matrix3f>() ));
CALL_SUBTEST_4(( jacobisvd<Matrix4d>() ));
CALL_SUBTEST_5(( jacobisvd<Matrix<float,3,5> >() ));
@@ -473,8 +99,8 @@ void test_jacobisvd()
(void) c;
// Test on inf/nan matrix
- CALL_SUBTEST_7( jacobisvd_inf_nan<MatrixXf>() );
- CALL_SUBTEST_10( jacobisvd_inf_nan<MatrixXd>() );
+ CALL_SUBTEST_7( (svd_inf_nan<JacobiSVD<MatrixXf>, MatrixXf>()) );
+ CALL_SUBTEST_10( (svd_inf_nan<JacobiSVD<MatrixXd>, MatrixXd>()) );
}
CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
@@ -488,8 +114,7 @@ void test_jacobisvd()
CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
// Check that preallocation avoids subsequent mallocs
- CALL_SUBTEST_9( jacobisvd_preallocate() );
+ CALL_SUBTEST_9( svd_preallocate() );
- // Regression check for bug 286
- CALL_SUBTEST_2( jacobisvd_underoverflow() );
+ CALL_SUBTEST_2( svd_underoverflow() );
}
diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp
index b627915ce..8e3cc9a86 100644
--- a/test/linearstructure.cpp
+++ b/test/linearstructure.cpp
@@ -9,7 +9,6 @@
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
static bool g_called;
-
#define EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN { g_called = true; }
#include "main.h"
@@ -93,6 +92,8 @@ template<typename MatrixType> void real_complex(DenseIndex rows = MatrixType::Ro
void test_linearstructure()
{
+ g_called = true;
+ VERIFY(g_called); // avoid `unneeded-internal-declaration` warning.
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( linearStructure(Matrix<float, 1, 1>()) );
CALL_SUBTEST_2( linearStructure(Matrix2f()) );
@@ -107,4 +108,19 @@ void test_linearstructure()
CALL_SUBTEST_10( real_complex<Matrix4cd>() );
CALL_SUBTEST_10( real_complex<MatrixXcf>(10,10) );
}
+
+#ifdef EIGEN_TEST_PART_4
+ {
+ // make sure that /=scalar and /scalar do not overflow
+ // rational: 1.0/4.94e-320 overflow, but m/4.94e-320 should not
+ Matrix4d m2, m3;
+ m3 = m2 = Matrix4d::Random()*1e-20;
+ m2 = m2 / 4.9e-320;
+ VERIFY_IS_APPROX(m2.cwiseQuotient(m2), Matrix4d::Ones());
+ m3 /= 4.9e-320;
+ VERIFY_IS_APPROX(m3.cwiseQuotient(m3), Matrix4d::Ones());
+
+
+ }
+#endif
}
diff --git a/test/main.h b/test/main.h
index 9cb41c828..579cd2131 100644
--- a/test/main.h
+++ b/test/main.h
@@ -61,7 +61,7 @@
#endif
// shuts down ICC's remark #593: variable "XXX" was set but never used
-#define TEST_SET_BUT_UNUSED_VARIABLE(X) X = X + 0;
+#define TEST_SET_BUT_UNUSED_VARIABLE(X) EIGEN_UNUSED_VARIABLE(X)
// the following file is automatically generated by cmake
#include "split_test_helper.h"
@@ -76,7 +76,7 @@
#endif
// bounds integer values for AltiVec
-#ifdef __ALTIVEC__
+#if defined(__ALTIVEC__) || defined(__VSX__)
#define EIGEN_MAKING_DOCS
#endif
@@ -94,6 +94,9 @@ namespace Eigen
static bool g_has_set_repeat, g_has_set_seed;
}
+#define TRACK std::cerr << __FILE__ << " " << __LINE__ << std::endl
+// #define TRACK while()
+
#define EI_PP_MAKE_STRING2(S) #S
#define EI_PP_MAKE_STRING(S) EI_PP_MAKE_STRING2(S)
@@ -312,13 +315,7 @@ inline bool test_isApproxOrLessThan(const long double& a, const long double& b)
template<typename Type1, typename Type2>
inline bool test_isApprox(const Type1& a, const Type2& b)
{
-#ifdef EIGEN_TEST_EVALUATORS
- typename internal::eval<Type1>::type a_eval(a);
- typename internal::eval<Type2>::type b_eval(b);
- return a_eval.isApprox(b_eval, test_precision<typename Type1::Scalar>());
-#else
return a.isApprox(b, test_precision<typename Type1::Scalar>());
-#endif
}
// The idea behind this function is to compare the two scalars a and b where
@@ -436,6 +433,26 @@ void randomPermutationVector(PermutationVectorType& v, typename PermutationVecto
}
}
+template<typename T> bool isNotNaN(const T& x)
+{
+ return x==x;
+}
+
+template<typename T> bool isNaN(const T& x)
+{
+ return x!=x;
+}
+
+template<typename T> bool isInf(const T& x)
+{
+ return x > NumTraits<T>::highest();
+}
+
+template<typename T> bool isMinusInf(const T& x)
+{
+ return x < NumTraits<T>::lowest();
+}
+
} // end namespace Eigen
template<typename T> struct GetDifferentType;
diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp
index 1e0e2d4c1..048f7255a 100644
--- a/test/mixingtypes.cpp
+++ b/test/mixingtypes.cpp
@@ -53,10 +53,11 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
mf+mf;
VERIFY_RAISES_ASSERT(mf+md);
VERIFY_RAISES_ASSERT(mf+mcf);
- VERIFY_RAISES_ASSERT(vf=vd);
- VERIFY_RAISES_ASSERT(vf+=vd);
- VERIFY_RAISES_ASSERT(mcd=md);
-
+ // the following do not even compile since the introduction of evaluators
+// VERIFY_RAISES_ASSERT(vf=vd);
+// VERIFY_RAISES_ASSERT(vf+=vd);
+// VERIFY_RAISES_ASSERT(mcd=md);
+
// check scalar products
VERIFY_IS_APPROX(vcf * sf , vcf * complex<float>(sf));
VERIFY_IS_APPROX(sd * vcd, complex<double>(sd) * vcd);
diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp
index 1e8523283..6e772c70f 100644
--- a/test/nesting_ops.cpp
+++ b/test/nesting_ops.cpp
@@ -11,7 +11,7 @@
template <typename MatrixType> void run_nesting_ops(const MatrixType& _m)
{
- typename MatrixType::Nested m(_m);
+ typename internal::nested_eval<MatrixType,2>::type m(_m);
// Make really sure that we are in debug mode!
VERIFY_RAISES_ASSERT(eigen_assert(false));
diff --git a/test/nomalloc.cpp b/test/nomalloc.cpp
index cbd02dd21..306664210 100644
--- a/test/nomalloc.cpp
+++ b/test/nomalloc.cpp
@@ -21,7 +21,7 @@
// discard stack allocation as that too bypasses malloc
#define EIGEN_STACK_ALLOCATION_LIMIT 0
// any heap allocation will raise an assert
-#define EIGEN_NO_MALLOC
+#define EIGEN_RUNTIME_NO_MALLOC
#include "main.h"
#include <Eigen/Cholesky>
@@ -165,8 +165,62 @@ void ctms_decompositions()
Eigen::JacobiSVD<Matrix> jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV);
}
+void test_zerosized() {
+ // default constructors:
+ Eigen::MatrixXd A;
+ Eigen::VectorXd v;
+ // explicit zero-sized:
+ Eigen::ArrayXXd A0(0,0);
+ Eigen::ArrayXd v0(0);
+
+ // assigning empty objects to each other:
+ A=A0;
+ v=v0;
+}
+
+template<typename MatrixType> void test_reference(const MatrixType& m) {
+ typedef typename MatrixType::Scalar Scalar;
+ enum { Flag = MatrixType::IsRowMajor ? Eigen::RowMajor : Eigen::ColMajor};
+ enum { TransposeFlag = !MatrixType::IsRowMajor ? Eigen::RowMajor : Eigen::ColMajor};
+ typename MatrixType::Index rows = m.rows(), cols=m.cols();
+ typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Flag > MatrixX;
+ typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, TransposeFlag> MatrixXT;
+ // Dynamic reference:
+ typedef Eigen::Ref<const MatrixX > Ref;
+ typedef Eigen::Ref<const MatrixXT > RefT;
+
+ Ref r1(m);
+ Ref r2(m.block(rows/3, cols/4, rows/2, cols/2));
+ RefT r3(m.transpose());
+ RefT r4(m.topLeftCorner(rows/2, cols/2).transpose());
+
+ VERIFY_RAISES_ASSERT(RefT r5(m));
+ VERIFY_RAISES_ASSERT(Ref r6(m.transpose()));
+ VERIFY_RAISES_ASSERT(Ref r7(Scalar(2) * m));
+
+ // Copy constructors shall also never malloc
+ Ref r8 = r1;
+ RefT r9 = r3;
+
+ // Initializing from a compatible Ref shall also never malloc
+ Eigen::Ref<const MatrixX, Unaligned, Stride<Dynamic, Dynamic> > r10=r8, r11=m;
+
+ // Initializing from an incompatible Ref will malloc:
+ typedef Eigen::Ref<const MatrixX, Aligned> RefAligned;
+ VERIFY_RAISES_ASSERT(RefAligned r12=r10);
+ VERIFY_RAISES_ASSERT(Ref r13=r10); // r10 has more dynamic strides
+
+}
+
void test_nomalloc()
{
+ // create some dynamic objects
+ Eigen::MatrixXd M1 = MatrixXd::Random(3,3);
+ Ref<const MatrixXd> R1 = 2.0*M1; // Ref requires temporary
+
+ // from here on prohibit malloc:
+ Eigen::internal::set_is_malloc_allowed(false);
+
// check that our operator new is indeed called:
VERIFY_RAISES_ASSERT(MatrixXd dummy(MatrixXd::Random(3,3)));
CALL_SUBTEST_1(nomalloc(Matrix<float, 1, 1>()) );
@@ -176,4 +230,9 @@ void test_nomalloc()
// Check decomposition modules with dynamic matrices that have a known compile-time max size (ctms)
CALL_SUBTEST_4(ctms_decompositions<float>());
+ CALL_SUBTEST_5(test_zerosized());
+
+ CALL_SUBTEST_6(test_reference(Matrix<float,32,32>()));
+ CALL_SUBTEST_7(test_reference(R1));
+ CALL_SUBTEST_8(Ref<MatrixXd> R2 = M1.topRows<2>(); test_reference(R2));
}
diff --git a/test/nullary.cpp b/test/nullary.cpp
index 5408d88b2..fbc721a1a 100644
--- a/test/nullary.cpp
+++ b/test/nullary.cpp
@@ -80,7 +80,9 @@ void testVectorType(const VectorType& base)
Matrix<Scalar,1,Dynamic> col_vector(size);
row_vector.setLinSpaced(size,low,high);
col_vector.setLinSpaced(size,low,high);
- VERIFY( row_vector.isApprox(col_vector.transpose(), NumTraits<Scalar>::epsilon()));
+ // when using the extended precision (e.g., FPU) the relative error might exceed 1 bit
+ // when computing the squared sum in isApprox, thus the 2x factor.
+ VERIFY( row_vector.isApprox(col_vector.transpose(), Scalar(2)*NumTraits<Scalar>::epsilon()));
Matrix<Scalar,Dynamic,1> size_changer(size+50);
size_changer.setLinSpaced(size,low,high);
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index af9be89ca..49f601907 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -156,7 +156,7 @@ template<typename Scalar> void packetmath()
CHECK_CWISE2(REF_ADD, internal::padd);
CHECK_CWISE2(REF_SUB, internal::psub);
CHECK_CWISE2(REF_MUL, internal::pmul);
- #ifndef EIGEN_VECTORIZE_ALTIVEC
+ #if !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
if (!internal::is_same<Scalar,int>::value)
CHECK_CWISE2(REF_DIV, internal::pdiv);
#endif
@@ -313,6 +313,12 @@ template<typename Scalar> void packetmath_real()
data2[i] = internal::random<Scalar>(-87,88);
}
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasExp, std::exp, internal::pexp);
+ {
+ data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
+ packet_helper<internal::packet_traits<Scalar>::HasExp,Packet> h;
+ h.store(data2, internal::pexp(h.load(data1)));
+ VERIFY(isNaN(data2[0]));
+ }
for (int i=0; i<size; ++i)
{
@@ -321,8 +327,22 @@ template<typename Scalar> void packetmath_real()
}
if(internal::random<float>(0,1)<0.1)
data1[internal::random<int>(0, PacketSize)] = 0;
- CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasSqrt, std::sqrt, internal::psqrt);
+ CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
+ {
+ data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
+ packet_helper<internal::packet_traits<Scalar>::HasLog,Packet> h;
+ h.store(data2, internal::plog(h.load(data1)));
+ VERIFY(isNaN(data2[0]));
+ data1[0] = -1.0f;
+ h.store(data2, internal::plog(h.load(data1)));
+ VERIFY(isNaN(data2[0]));
+#if !EIGEN_FAST_MATH
+ h.store(data2, internal::psqrt(h.load(data1)));
+ VERIFY(isNaN(data2[0]));
+ VERIFY(isNaN(data2[1]));
+#endif
+ }
}
template<typename Scalar> void packetmath_notcomplex()
diff --git a/test/product_mmtr.cpp b/test/product_mmtr.cpp
index 7d6746800..92e6b668f 100644
--- a/test/product_mmtr.cpp
+++ b/test/product_mmtr.cpp
@@ -13,7 +13,8 @@
ref2 = ref1 = DEST; \
DEST.template triangularView<TRI>() OP; \
ref1 OP; \
- ref2.template triangularView<TRI>() = ref1; \
+ ref2.template triangularView<TRI>() \
+ = ref1.template triangularView<TRI>(); \
VERIFY_IS_APPROX(DEST,ref2); \
}
diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp
index 3a9df618b..805cc8939 100644
--- a/test/product_notemporary.cpp
+++ b/test/product_notemporary.cpp
@@ -113,8 +113,7 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose() * m3).diagonal().array().abs().sum(), 0 );
// Zero temporaries for ... CoeffBasedProductMode
- // - does not work with GCC because of the <..>, we'ld need variadic macros ...
- //VERIFY_EVALUATION_COUNT( m3.col(0).head<5>() * m3.col(0).transpose() + m3.col(0).head<5>() * m3.col(0).transpose(), 0 );
+ VERIFY_EVALUATION_COUNT( m3.col(0).template head<5>() * m3.col(0).transpose() + m3.col(0).template head<5>() * m3.col(0).transpose(), 0 );
// Check matrix * vectors
VERIFY_EVALUATION_COUNT( cvres.noalias() = m1 * cv1, 0 );
diff --git a/test/product_small.cpp b/test/product_small.cpp
index 8b132abb6..091955a0f 100644
--- a/test/product_small.cpp
+++ b/test/product_small.cpp
@@ -9,6 +9,7 @@
#define EIGEN_NO_STATIC_ASSERT
#include "product.h"
+#include <Eigen/LU>
// regression test for bug 447
void product1x1()
@@ -46,5 +47,14 @@ void test_product_small()
Vector3f v = Vector3f::Random();
VERIFY_IS_APPROX( (v * v.transpose()) * v, (v * v.transpose()).eval() * v);
}
+
+ {
+ // regression test for pull-request #93
+ Eigen::Matrix<double, 1, 1> A; A.setRandom();
+ Eigen::Matrix<double, 18, 1> B; B.setRandom();
+ Eigen::Matrix<double, 1, 18> C; C.setRandom();
+ VERIFY_IS_APPROX(B * A.inverse(), B * A.inverse()[0]);
+ VERIFY_IS_APPROX(A.inverse() * C, A.inverse()[0] * C);
+ }
#endif
}
diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp
index 511f2473f..601773404 100644
--- a/test/qr_fullpivoting.cpp
+++ b/test/qr_fullpivoting.cpp
@@ -40,7 +40,11 @@ template<typename MatrixType> void qr()
MatrixType c = qr.matrixQ() * r * qr.colsPermutation().inverse();
VERIFY_IS_APPROX(m1, c);
-
+
+ // stress the ReturnByValue mechanism
+ MatrixType tmp;
+ VERIFY_IS_APPROX(tmp.noalias() = qr.matrixQ() * r, (qr.matrixQ() * r).eval());
+
MatrixType m2 = MatrixType::Random(cols,cols2);
MatrixType m3 = m1*m2;
m2 = MatrixType::Random(cols,cols2);
diff --git a/test/ref.cpp b/test/ref.cpp
index d91e3b54c..b9470213c 100644
--- a/test/ref.cpp
+++ b/test/ref.cpp
@@ -182,15 +182,15 @@ void call_ref()
VERIFY_EVALUATION_COUNT( call_ref_1(a,a), 0);
VERIFY_EVALUATION_COUNT( call_ref_1(b,b.transpose()), 0);
-// call_ref_1(ac); // does not compile because ac is const
+// call_ref_1(ac,a<c); // does not compile because ac is const
VERIFY_EVALUATION_COUNT( call_ref_1(ab,ab), 0);
VERIFY_EVALUATION_COUNT( call_ref_1(a.head(4),a.head(4)), 0);
VERIFY_EVALUATION_COUNT( call_ref_1(abc,abc), 0);
VERIFY_EVALUATION_COUNT( call_ref_1(A.col(3),A.col(3)), 0);
-// call_ref_1(A.row(3)); // does not compile because innerstride!=1
+// call_ref_1(A.row(3),A.row(3)); // does not compile because innerstride!=1
VERIFY_EVALUATION_COUNT( call_ref_3(A.row(3),A.row(3).transpose()), 0);
VERIFY_EVALUATION_COUNT( call_ref_4(A.row(3),A.row(3).transpose()), 0);
-// call_ref_1(a+a); // does not compile for obvious reason
+// call_ref_1(a+a, a+a); // does not compile for obvious reason
MatrixXf tmp = A*A.col(1);
VERIFY_EVALUATION_COUNT( call_ref_2(A*A.col(1), tmp), 1); // evaluated into a temp
@@ -211,7 +211,7 @@ void call_ref()
VERIFY_EVALUATION_COUNT( call_ref_5(a,a), 0);
VERIFY_EVALUATION_COUNT( call_ref_5(a.head(3),a.head(3)), 0);
VERIFY_EVALUATION_COUNT( call_ref_5(A,A), 0);
-// call_ref_5(A.transpose()); // does not compile
+// call_ref_5(A.transpose(),A.transpose()); // does not compile because storage order does not match
VERIFY_EVALUATION_COUNT( call_ref_5(A.block(1,1,2,2),A.block(1,1,2,2)), 0);
VERIFY_EVALUATION_COUNT( call_ref_5(b,b), 0); // storage order do not match, but this is a degenerate case that should work
VERIFY_EVALUATION_COUNT( call_ref_5(a.row(3),a.row(3)), 0);
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index 4c9b9111e..097959c84 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -18,6 +18,9 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
const Index rows = ref.rows();
const Index cols = ref.cols();
+ const Index inner = ref.innerSize();
+ const Index outer = ref.outerSize();
+
typedef typename SparseMatrixType::Scalar Scalar;
enum { Flags = SparseMatrixType::Flags };
@@ -36,23 +39,22 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
std::vector<Vector2> nonzeroCoords;
initSparse<Scalar>(density, refMat, m, 0, &zeroCoords, &nonzeroCoords);
- if (zeroCoords.size()==0 || nonzeroCoords.size()==0)
- return;
-
// test coeff and coeffRef
- for (int i=0; i<(int)zeroCoords.size(); ++i)
+ for (std::size_t i=0; i<zeroCoords.size(); ++i)
{
VERIFY_IS_MUCH_SMALLER_THAN( m.coeff(zeroCoords[i].x(),zeroCoords[i].y()), eps );
if(internal::is_same<SparseMatrixType,SparseMatrix<Scalar,Flags> >::value)
- VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[0].x(),zeroCoords[0].y()) = 5 );
+ VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[i].x(),zeroCoords[i].y()) = 5 );
}
VERIFY_IS_APPROX(m, refMat);
- m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);
- refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);
+ if(!nonzeroCoords.empty()) {
+ m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);
+ refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);
+ }
VERIFY_IS_APPROX(m, refMat);
- /*
+
// test InnerIterators and Block expressions
for (int t=0; t<10; ++t)
{
@@ -61,23 +63,25 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
int w = internal::random<int>(1,cols-j-1);
int h = internal::random<int>(1,rows-i-1);
- // VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
+ VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
for(int c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
for(int r=0; r<h; r++)
{
- // VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
+ // FIXME col().coeff() not implemented yet
+// VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
}
}
- // for(int r=0; r<h; r++)
- // {
- // VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
- // for(int c=0; c<w; c++)
- // {
- // VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
- // }
- // }
+ for(int r=0; r<h; r++)
+ {
+ VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
+ for(int c=0; c<w; c++)
+ {
+ // FIXME row().coeff() not implemented yet
+// VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
+ }
+ }
}
for(int c=0; c<cols; c++)
@@ -91,8 +95,8 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
}
- */
+
// test assertion
VERIFY_RAISES_ASSERT( m.coeffRef(-1,1) = 0 );
VERIFY_RAISES_ASSERT( m.coeffRef(0,m.cols()) = 0 );
@@ -165,11 +169,11 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
// test innerVector()
{
- DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
- SparseMatrixType m2(rows, rows);
+ DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
+ SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
- Index j0 = internal::random<Index>(0,rows-1);
- Index j1 = internal::random<Index>(0,rows-1);
+ Index j0 = internal::random<Index>(0,outer-1);
+ Index j1 = internal::random<Index>(0,outer-1);
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0));
else
@@ -180,42 +184,41 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
else
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
- SparseMatrixType m3(rows,rows);
- m3.reserve(VectorXi::Constant(rows,int(rows/2)));
- for(Index j=0; j<rows; ++j)
- for(Index k=0; k<j; ++k)
+ SparseMatrixType m3(rows,cols);
+ m3.reserve(VectorXi::Constant(outer,int(inner/2)));
+ for(Index j=0; j<outer; ++j)
+ for(Index k=0; k<(std::min)(j,inner); ++k)
m3.insertByOuterInner(j,k) = k+1;
- for(Index j=0; j<rows; ++j)
+ for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
m3.makeCompressed();
- for(Index j=0; j<rows; ++j)
+ for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
-
+
VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
- //m2.innerVector(j0) = 2*m2.innerVector(j1);
- //refMat2.col(j0) = 2*refMat2.col(j1);
- //VERIFY_IS_APPROX(m2, refMat2);
+// m2.innerVector(j0) = 2*m2.innerVector(j1);
+// refMat2.col(j0) = 2*refMat2.col(j1);
+// VERIFY_IS_APPROX(m2, refMat2);
}
// test innerVectors()
{
- DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
- SparseMatrixType m2(rows, rows);
+ DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
+ SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
-
- Index j0 = internal::random<Index>(0,rows-2);
- Index j1 = internal::random<Index>(0,rows-2);
- Index n0 = internal::random<Index>(1,rows-(std::max)(j0,j1));
+ Index j0 = internal::random<Index>(0,outer-2);
+ Index j1 = internal::random<Index>(0,outer-2);
+ Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols));
else
@@ -239,22 +242,23 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY_IS_APPROX(m2, refMat2);
}
-
+
// test basic computations
{
- DenseMatrix refM1 = DenseMatrix::Zero(rows, rows);
- DenseMatrix refM2 = DenseMatrix::Zero(rows, rows);
- DenseMatrix refM3 = DenseMatrix::Zero(rows, rows);
- DenseMatrix refM4 = DenseMatrix::Zero(rows, rows);
- SparseMatrixType m1(rows, rows);
- SparseMatrixType m2(rows, rows);
- SparseMatrixType m3(rows, rows);
- SparseMatrixType m4(rows, rows);
+ DenseMatrix refM1 = DenseMatrix::Zero(rows, cols);
+ DenseMatrix refM2 = DenseMatrix::Zero(rows, cols);
+ DenseMatrix refM3 = DenseMatrix::Zero(rows, cols);
+ DenseMatrix refM4 = DenseMatrix::Zero(rows, cols);
+ SparseMatrixType m1(rows, cols);
+ SparseMatrixType m2(rows, cols);
+ SparseMatrixType m3(rows, cols);
+ SparseMatrixType m4(rows, cols);
initSparse<Scalar>(density, refM1, m1);
initSparse<Scalar>(density, refM2, m2);
initSparse<Scalar>(density, refM3, m3);
initSparse<Scalar>(density, refM4, m4);
+ VERIFY_IS_APPROX(m1*s1, refM1*s1);
VERIFY_IS_APPROX(m1+m2, refM1+refM2);
VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3);
VERIFY_IS_APPROX(m3.cwiseProduct(m1+m2), refM3.cwiseProduct(refM1+refM2));
@@ -269,7 +273,7 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.row(0).dot(refM2.row(0)));
else
- VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.col(0).dot(refM2.row(0)));
+ VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.col(0)), refM1.col(0).dot(refM2.col(0)));
DenseVector rv = DenseVector::Random(m1.cols());
DenseVector cv = DenseVector::Random(m1.rows());
@@ -296,25 +300,29 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
// test transpose
{
- DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
- SparseMatrixType m2(rows, rows);
+ DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
+ SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
VERIFY_IS_APPROX(m2.transpose().eval(), refMat2.transpose().eval());
VERIFY_IS_APPROX(m2.transpose(), refMat2.transpose());
VERIFY_IS_APPROX(SparseMatrixType(m2.adjoint()), refMat2.adjoint());
+
+ // check isApprox handles opposite storage order
+ typename Transpose<SparseMatrixType>::PlainObject m3(m2);
+ VERIFY(m2.isApprox(m3));
}
// test generic blocks
{
- DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
- SparseMatrixType m2(rows, rows);
+ DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
+ SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
- Index j0 = internal::random<Index>(0,rows-2);
- Index j1 = internal::random<Index>(0,rows-2);
- Index n0 = internal::random<Index>(1,rows-(std::max)(j0,j1));
+ Index j0 = internal::random<Index>(0,outer-2);
+ Index j1 = internal::random<Index>(0,outer-2);
+ Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols));
else
@@ -341,8 +349,8 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
// test prune
{
- SparseMatrixType m2(rows, rows);
- DenseMatrix refM2(rows, rows);
+ SparseMatrixType m2(rows, cols);
+ DenseMatrix refM2(rows, cols);
refM2.setZero();
int countFalseNonZero = 0;
int countTrueNonZero = 0;
@@ -403,8 +411,8 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
// test triangularView
{
- DenseMatrix refMat2(rows, rows), refMat3(rows, rows);
- SparseMatrixType m2(rows, rows), m3(rows, rows);
+ DenseMatrix refMat2(rows, cols), refMat3(rows, cols);
+ SparseMatrixType m2(rows, cols), m3(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
refMat3 = refMat2.template triangularView<Lower>();
m3 = m2.template triangularView<Lower>();
@@ -414,13 +422,16 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
m3 = m2.template triangularView<Upper>();
VERIFY_IS_APPROX(m3, refMat3);
- refMat3 = refMat2.template triangularView<UnitUpper>();
- m3 = m2.template triangularView<UnitUpper>();
- VERIFY_IS_APPROX(m3, refMat3);
+ if(inner>=outer) // FIXME this should be implemented for outer>inner as well
+ {
+ refMat3 = refMat2.template triangularView<UnitUpper>();
+ m3 = m2.template triangularView<UnitUpper>();
+ VERIFY_IS_APPROX(m3, refMat3);
- refMat3 = refMat2.template triangularView<UnitLower>();
- m3 = m2.template triangularView<UnitLower>();
- VERIFY_IS_APPROX(m3, refMat3);
+ refMat3 = refMat2.template triangularView<UnitLower>();
+ m3 = m2.template triangularView<UnitLower>();
+ VERIFY_IS_APPROX(m3, refMat3);
+ }
refMat3 = refMat2.template triangularView<StrictlyUpper>();
m3 = m2.template triangularView<StrictlyUpper>();
@@ -440,6 +451,11 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
refMat3 = refMat2.template selfadjointView<Lower>();
m3 = m2.template selfadjointView<Lower>();
VERIFY_IS_APPROX(m3, refMat3);
+
+ // selfadjointView only works for square matrices:
+ SparseMatrixType m4(rows, rows+1);
+ VERIFY_RAISES_ASSERT(m4.template selfadjointView<Lower>());
+ VERIFY_RAISES_ASSERT(m4.template selfadjointView<Upper>());
}
// test sparseView
@@ -452,16 +468,23 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
// test diagonal
{
- DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
- SparseMatrixType m2(rows, rows);
+ DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
+ SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
VERIFY_IS_APPROX(m2.diagonal(), refMat2.diagonal().eval());
+ VERIFY_IS_APPROX(const_cast<const SparseMatrixType&>(m2).diagonal(), refMat2.diagonal().eval());
+
+ initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag);
+ m2.diagonal() += refMat2.diagonal();
+ refMat2.diagonal() += refMat2.diagonal();
+ VERIFY_IS_APPROX(m2, refMat2);
}
// test conservative resize
{
std::vector< std::pair<Index,Index> > inc;
- inc.push_back(std::pair<Index,Index>(-3,-2));
+ if(rows > 3 && cols > 2)
+ inc.push_back(std::pair<Index,Index>(-3,-2));
inc.push_back(std::pair<Index,Index>(0,0));
inc.push_back(std::pair<Index,Index>(3,2));
inc.push_back(std::pair<Index,Index>(3,0));
@@ -502,19 +525,54 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
}
}
+
+template<typename SparseMatrixType>
+void big_sparse_triplet(typename SparseMatrixType::Index rows, typename SparseMatrixType::Index cols, double density) {
+typedef typename SparseMatrixType::Index Index;
+typedef typename SparseMatrixType::Scalar Scalar;
+typedef Triplet<Scalar,Index> TripletType;
+std::vector<TripletType> triplets;
+double nelements = density * rows*cols;
+VERIFY(nelements>=0 && nelements < NumTraits<Index>::highest());
+Index ntriplets = Index(nelements);
+triplets.reserve(ntriplets);
+Scalar sum = Scalar(0);
+for(Index i=0;i<ntriplets;++i)
+{
+ Index r = internal::random<Index>(0,rows-1);
+ Index c = internal::random<Index>(0,cols-1);
+ Scalar v = internal::random<Scalar>();
+ triplets.push_back(TripletType(r,c,v));
+ sum += v;
+}
+SparseMatrixType m(rows,cols);
+m.setFromTriplets(triplets.begin(), triplets.end());
+VERIFY(m.nonZeros() <= ntriplets);
+VERIFY_IS_APPROX(sum, m.sum());
+}
+
+
void test_sparse_basic()
{
for(int i = 0; i < g_repeat; i++) {
- int s = Eigen::internal::random<int>(1,50);
- EIGEN_UNUSED_VARIABLE(s);
+ int r = Eigen::internal::random<int>(1,100), c = Eigen::internal::random<int>(1,100);
+ if(Eigen::internal::random<int>(0,4) == 0) {
+ r = c; // check square matrices in 25% of tries
+ }
+ EIGEN_UNUSED_VARIABLE(r+c);
+ CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(1, 1)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(8, 8)) ));
- CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(s, s)) ));
- CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(s, s)) ));
- CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(s, s)) ));
- CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,long int>(s, s)) ));
- CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,long int>(s, s)) ));
+ CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
+ CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
+ CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(r, c)) ));
+ CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
+ CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
- CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(s), short(s))) ));
- CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(s), short(s))) ));
+ CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
+ CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
}
+
+ // Regression test for bug 900: (manually insert higher values here, if you have enough RAM):
+ CALL_SUBTEST_3((big_sparse_triplet<SparseMatrix<float, RowMajor, int> >(10000, 10000, 0.125)));
+ CALL_SUBTEST_4((big_sparse_triplet<SparseMatrix<double, ColMajor, long int> >(10000, 10000, 0.125)));
}
diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp
index 0f52164c8..366e27274 100644
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp
@@ -19,7 +19,7 @@ template<typename SparseMatrixType> void sparse_product()
typedef typename SparseMatrixType::Scalar Scalar;
enum { Flags = SparseMatrixType::Flags };
- double density = (std::max)(8./(rows*cols), 0.1);
+ double density = (std::max)(8./(rows*cols), 0.2);
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
typedef Matrix<Scalar,1,Dynamic> RowDenseVector;
@@ -77,17 +77,27 @@ template<typename SparseMatrixType> void sparse_product()
m4 = m2; refMat4 = refMat2;
VERIFY_IS_APPROX(m4=m4*m3, refMat4=refMat4*refMat3);
- // sparse * dense
+ // sparse * dense matrix
VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3);
VERIFY_IS_APPROX(dm4=m2*refMat3t.transpose(), refMat4=refMat2*refMat3t.transpose());
VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3, refMat4=refMat2t.transpose()*refMat3);
VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose());
+ VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3);
+ VERIFY_IS_APPROX(dm4=dm4+m2*refMat3, refMat4=refMat4+refMat2*refMat3);
VERIFY_IS_APPROX(dm4=m2*(refMat3+refMat3), refMat4=refMat2*(refMat3+refMat3));
VERIFY_IS_APPROX(dm4=m2t.transpose()*(refMat3+refMat5)*0.5, refMat4=refMat2t.transpose()*(refMat3+refMat5)*0.5);
+
+ // sparse * dense vector
+ VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3.col(0), refMat4.col(0)=refMat2*refMat3.col(0));
+ VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3t.transpose().col(0), refMat4.col(0)=refMat2*refMat3t.transpose().col(0));
+ VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3.col(0), refMat4.col(0)=refMat2t.transpose()*refMat3.col(0));
+ VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3t.transpose().col(0), refMat4.col(0)=refMat2t.transpose()*refMat3t.transpose().col(0));
// dense * sparse
VERIFY_IS_APPROX(dm4=refMat2*m3, refMat4=refMat2*refMat3);
+ VERIFY_IS_APPROX(dm4=dm4+refMat2*m3, refMat4=refMat4+refMat2*refMat3);
+ VERIFY_IS_APPROX(dm4+=refMat2*m3, refMat4+=refMat2*refMat3);
VERIFY_IS_APPROX(dm4=refMat2*m3t.transpose(), refMat4=refMat2*refMat3t.transpose());
VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3, refMat4=refMat2t.transpose()*refMat3);
VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose());
@@ -99,7 +109,7 @@ template<typename SparseMatrixType> void sparse_product()
Index c1 = internal::random<Index>(0,cols-1);
Index r1 = internal::random<Index>(0,depth-1);
DenseMatrix dm5 = DenseMatrix::Random(depth, cols);
-
+
VERIFY_IS_APPROX( m4=m2.col(c)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose());
VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count());
VERIFY_IS_APPROX( m4=m2.middleCols(c,1)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose());
@@ -143,11 +153,11 @@ template<typename SparseMatrixType> void sparse_product()
RowSpVector rv0(depth), rv1;
RowDenseVector drv0(depth), drv1(rv1);
initSparse(2*density,drv0, rv0);
-
- VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3);
+
+ VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0);
VERIFY_IS_APPROX(rv1=rv0*m3, drv1=drv0*refMat3);
- VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0);
VERIFY_IS_APPROX(cv1=m3t.adjoint()*cv0, dcv1=refMat3t.adjoint()*dcv0);
+ VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3);
VERIFY_IS_APPROX(rv1=m3*cv0, drv1=refMat3*dcv0);
}
@@ -184,7 +194,7 @@ template<typename SparseMatrixType> void sparse_product()
VERIFY_IS_APPROX(d3=d1*m2.transpose(), refM3=d1*refM2.transpose());
}
- // test self adjoint products
+ // test self-adjoint and traingular-view products
{
DenseMatrix b = DenseMatrix::Random(rows, rows);
DenseMatrix x = DenseMatrix::Random(rows, rows);
@@ -192,9 +202,12 @@ template<typename SparseMatrixType> void sparse_product()
DenseMatrix refUp = DenseMatrix::Zero(rows, rows);
DenseMatrix refLo = DenseMatrix::Zero(rows, rows);
DenseMatrix refS = DenseMatrix::Zero(rows, rows);
+ DenseMatrix refA = DenseMatrix::Zero(rows, rows);
SparseMatrixType mUp(rows, rows);
SparseMatrixType mLo(rows, rows);
SparseMatrixType mS(rows, rows);
+ SparseMatrixType mA(rows, rows);
+ initSparse<Scalar>(density, refA, mA);
do {
initSparse<Scalar>(density, refUp, mUp, ForceRealDiag|/*ForceNonZeroDiag|*/MakeUpperTriangular);
} while (refUp.isZero());
@@ -214,19 +227,30 @@ template<typename SparseMatrixType> void sparse_product()
VERIFY_IS_APPROX(mS, refS);
VERIFY_IS_APPROX(x=mS*b, refX=refS*b);
+ // sparse selfadjointView with dense matrices
VERIFY_IS_APPROX(x=mUp.template selfadjointView<Upper>()*b, refX=refS*b);
VERIFY_IS_APPROX(x=mLo.template selfadjointView<Lower>()*b, refX=refS*b);
VERIFY_IS_APPROX(x=mS.template selfadjointView<Upper|Lower>()*b, refX=refS*b);
- // sparse selfadjointView * sparse
+ // sparse selfadjointView with sparse matrices
SparseMatrixType mSres(rows,rows);
VERIFY_IS_APPROX(mSres = mLo.template selfadjointView<Lower>()*mS,
refX = refLo.template selfadjointView<Lower>()*refS);
- // sparse * sparse selfadjointview
VERIFY_IS_APPROX(mSres = mS * mLo.template selfadjointView<Lower>(),
refX = refS * refLo.template selfadjointView<Lower>());
+
+ // sparse triangularView with dense matrices
+ VERIFY_IS_APPROX(x=mA.template triangularView<Upper>()*b, refX=refA.template triangularView<Upper>()*b);
+ VERIFY_IS_APPROX(x=mA.template triangularView<Lower>()*b, refX=refA.template triangularView<Lower>()*b);
+ VERIFY_IS_APPROX(x=b*mA.template triangularView<Upper>(), refX=b*refA.template triangularView<Upper>());
+ VERIFY_IS_APPROX(x=b*mA.template triangularView<Lower>(), refX=b*refA.template triangularView<Lower>());
+
+ // sparse triangularView with sparse matrices
+ VERIFY_IS_APPROX(mSres = mA.template triangularView<Lower>()*mS, refX = refA.template triangularView<Lower>()*refS);
+ VERIFY_IS_APPROX(mSres = mS * mA.template triangularView<Lower>(), refX = refS * refA.template triangularView<Lower>());
+ VERIFY_IS_APPROX(mSres = mA.template triangularView<Upper>()*mS, refX = refA.template triangularView<Upper>()*refS);
+ VERIFY_IS_APPROX(mSres = mS * mA.template triangularView<Upper>(), refX = refS * refA.template triangularView<Upper>());
}
-
}
// New test for Bug in SparseTimeDenseProduct
diff --git a/test/sparse_solver.h b/test/sparse_solver.h
index d84aff070..ee350d561 100644
--- a/test/sparse_solver.h
+++ b/test/sparse_solver.h
@@ -15,6 +15,7 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
{
typedef typename Solver::MatrixType Mat;
typedef typename Mat::Scalar Scalar;
+ typedef typename Mat::Index Index;
DenseRhs refX = dA.lu().solve(db);
{
@@ -35,8 +36,8 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
return;
}
VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!");
-
VERIFY(x.isApprox(refX,test_precision<Scalar>()));
+
x.setZero();
// test the analyze/factorize API
solver.analyzePattern(A);
@@ -54,8 +55,31 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
return;
}
VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!");
-
VERIFY(x.isApprox(refX,test_precision<Scalar>()));
+
+
+ x.setZero();
+ // test with Map
+ MappedSparseMatrix<Scalar,Mat::Options,Index> Am(A.rows(), A.cols(), A.nonZeros(), const_cast<Index*>(A.outerIndexPtr()), const_cast<Index*>(A.innerIndexPtr()), const_cast<Scalar*>(A.valuePtr()));
+ solver.compute(Am);
+ if (solver.info() != Success)
+ {
+ std::cerr << "sparse solver testing: factorization failed (check_sparse_solving)\n";
+ exit(0);
+ return;
+ }
+ DenseRhs dx(refX);
+ dx.setZero();
+ Map<DenseRhs> xm(dx.data(), dx.rows(), dx.cols());
+ Map<const DenseRhs> bm(db.data(), db.rows(), db.cols());
+ xm = solver.solve(bm);
+ if (solver.info() != Success)
+ {
+ std::cerr << "sparse solver testing: solving failed\n";
+ return;
+ }
+ VERIFY(oldb.isApprox(bm) && "sparse solver testing: the rhs should not be modified!");
+ VERIFY(xm.isApprox(refX,test_precision<Scalar>()));
}
// test dense Block as the result and rhs:
@@ -67,6 +91,15 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!");
VERIFY(x.isApprox(refX,test_precision<Scalar>()));
}
+
+ // test uncompressed inputs
+ {
+ Mat A2 = A;
+ A2.reserve((ArrayXf::Random(A.outerSize())+2).template cast<typename Mat::Index>().eval());
+ solver.compute(A2);
+ Rhs x = solver.solve(b);
+ VERIFY(x.isApprox(refX,test_precision<Scalar>()));
+ }
}
template<typename Solver, typename Rhs>
@@ -124,7 +157,23 @@ void check_sparse_determinant(Solver& solver, const typename Solver::MatrixType&
Scalar refDet = dA.determinant();
VERIFY_IS_APPROX(refDet,solver.determinant());
}
+template<typename Solver, typename DenseMat>
+void check_sparse_abs_determinant(Solver& solver, const typename Solver::MatrixType& A, const DenseMat& dA)
+{
+ using std::abs;
+ typedef typename Solver::MatrixType Mat;
+ typedef typename Mat::Scalar Scalar;
+
+ solver.compute(A);
+ if (solver.info() != Success)
+ {
+ std::cerr << "sparse solver testing: factorization failed (check_sparse_abs_determinant)\n";
+ return;
+ }
+ Scalar refDet = abs(dA.determinant());
+ VERIFY_IS_APPROX(refDet,solver.absDeterminant());
+}
template<typename Solver, typename DenseMat>
int generate_sparse_spd_problem(Solver& , typename Solver::MatrixType& A, typename Solver::MatrixType& halfA, DenseMat& dA, int maxSize = 300)
@@ -324,3 +373,20 @@ template<typename Solver> void check_sparse_square_determinant(Solver& solver)
check_sparse_determinant(solver, A, dA);
}
}
+
+template<typename Solver> void check_sparse_square_abs_determinant(Solver& solver)
+{
+ typedef typename Solver::MatrixType Mat;
+ typedef typename Mat::Scalar Scalar;
+ typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
+
+ // generate the problem
+ Mat A;
+ DenseMatrix dA;
+ generate_sparse_square_problem(solver, A, dA, 30);
+ A.makeCompressed();
+ for (int i = 0; i < g_repeat; i++) {
+ check_sparse_abs_determinant(solver, A, dA);
+ }
+}
+
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index 0c9476803..5dc421976 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@@ -23,8 +23,8 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
SparseVectorType v1(rows), v2(rows), v3(rows);
DenseMatrix refM1 = DenseMatrix::Zero(rows, rows);
DenseVector refV1 = DenseVector::Random(rows),
- refV2 = DenseVector::Random(rows),
- refV3 = DenseVector::Random(rows);
+ refV2 = DenseVector::Random(rows),
+ refV3 = DenseVector::Random(rows);
std::vector<int> zerocoords, nonzerocoords;
initSparse<Scalar>(densityVec, refV1, v1, &zerocoords, &nonzerocoords);
@@ -52,6 +52,20 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
}
}
VERIFY_IS_APPROX(v1, refV1);
+
+ // test coeffRef with reallocation
+ {
+ SparseVectorType v1(rows);
+ DenseVector v2 = DenseVector::Zero(rows);
+ for(int k=0; k<rows; ++k)
+ {
+ int i = internal::random<int>(0,rows-1);
+ Scalar v = internal::random<Scalar>();
+ v1.coeffRef(i) += v;
+ v2.coeffRef(i) += v;
+ }
+ VERIFY_IS_APPROX(v1,v2);
+ }
v1.coeffRef(nonzerocoords[0]) = Scalar(5);
refV1.coeffRef(nonzerocoords[0]) = Scalar(5);
@@ -71,6 +85,7 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
VERIFY_IS_APPROX(v1.dot(v2), refV1.dot(refV2));
VERIFY_IS_APPROX(v1.dot(refV2), refV1.dot(refV2));
+ VERIFY_IS_APPROX(m1*v2, refM1*refV2);
VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2));
int i = internal::random<int>(0,rows-1);
VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i)));
diff --git a/test/sparselu.cpp b/test/sparselu.cpp
index 37980defc..52371cb12 100644
--- a/test/sparselu.cpp
+++ b/test/sparselu.cpp
@@ -44,6 +44,9 @@ template<typename T> void test_sparselu_T()
check_sparse_square_solving(sparselu_colamd);
check_sparse_square_solving(sparselu_amd);
check_sparse_square_solving(sparselu_natural);
+
+ check_sparse_square_abs_determinant(sparselu_colamd);
+ check_sparse_square_abs_determinant(sparselu_amd);
}
void test_sparselu()
diff --git a/test/stable_norm.cpp b/test/stable_norm.cpp
index f76919af4..650f62a8a 100644
--- a/test/stable_norm.cpp
+++ b/test/stable_norm.cpp
@@ -9,26 +9,6 @@
#include "main.h"
-template<typename T> bool isNotNaN(const T& x)
-{
- return x==x;
-}
-
-template<typename T> bool isNaN(const T& x)
-{
- return x!=x;
-}
-
-template<typename T> bool isInf(const T& x)
-{
- return x > NumTraits<T>::highest();
-}
-
-template<typename T> bool isMinusInf(const T& x)
-{
- return x < NumTraits<T>::lowest();
-}
-
// workaround aggressive optimization in ICC
template<typename T> EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; }
@@ -130,7 +110,7 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
// NaN
{
v = vrand;
- v(i,j) = RealScalar(0)/RealScalar(0);
+ v(i,j) = std::numeric_limits<RealScalar>::quiet_NaN();
VERIFY(!isFinite(v.squaredNorm())); VERIFY(isNaN(v.squaredNorm()));
VERIFY(!isFinite(v.norm())); VERIFY(isNaN(v.norm()));
VERIFY(!isFinite(v.stableNorm())); VERIFY(isNaN(v.stableNorm()));
@@ -141,7 +121,7 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
// +inf
{
v = vrand;
- v(i,j) = RealScalar(1)/RealScalar(0);
+ v(i,j) = std::numeric_limits<RealScalar>::infinity();
VERIFY(!isFinite(v.squaredNorm())); VERIFY(isInf(v.squaredNorm()));
VERIFY(!isFinite(v.norm())); VERIFY(isInf(v.norm()));
VERIFY(!isFinite(v.stableNorm())); VERIFY(isInf(v.stableNorm()));
@@ -152,7 +132,7 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
// -inf
{
v = vrand;
- v(i,j) = RealScalar(-1)/RealScalar(0);
+ v(i,j) = -std::numeric_limits<RealScalar>::infinity();
VERIFY(!isFinite(v.squaredNorm())); VERIFY(isInf(v.squaredNorm()));
VERIFY(!isFinite(v.norm())); VERIFY(isInf(v.norm()));
VERIFY(!isFinite(v.stableNorm())); VERIFY(isInf(v.stableNorm()));
@@ -165,8 +145,8 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
Index i2 = internal::random<Index>(0,rows-1);
Index j2 = internal::random<Index>(0,cols-1);
v = vrand;
- v(i,j) = RealScalar(-1)/RealScalar(0);
- v(i2,j2) = RealScalar(0)/RealScalar(0);
+ v(i,j) = -std::numeric_limits<RealScalar>::infinity();
+ v(i2,j2) = std::numeric_limits<RealScalar>::quiet_NaN();
VERIFY(!isFinite(v.squaredNorm())); VERIFY(isNaN(v.squaredNorm()));
VERIFY(!isFinite(v.norm())); VERIFY(isNaN(v.norm()));
VERIFY(!isFinite(v.stableNorm())); VERIFY(isNaN(v.stableNorm()));
diff --git a/test/svd_common.h b/test/svd_common.h
new file mode 100644
index 000000000..4c172cf9d
--- /dev/null
+++ b/test/svd_common.h
@@ -0,0 +1,493 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef SVD_DEFAULT
+#error a macro SVD_DEFAULT(MatrixType) must be defined prior to including svd_common.h
+#endif
+
+#ifndef SVD_FOR_MIN_NORM
+#error a macro SVD_FOR_MIN_NORM(MatrixType) must be defined prior to including svd_common.h
+#endif
+
+// Check that the matrix m is properly reconstructed and that the U and V factors are unitary
+// The SVD must have already been computed.
+template<typename SvdType, typename MatrixType>
+void svd_check_full(const MatrixType& m, const SvdType& svd)
+{
+ typedef typename MatrixType::Index Index;
+ Index rows = m.rows();
+ Index cols = m.cols();
+
+ enum {
+ RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime
+ };
+
+ typedef typename MatrixType::Scalar Scalar;
+ typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
+ typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
+
+ MatrixType sigma = MatrixType::Zero(rows,cols);
+ sigma.diagonal() = svd.singularValues().template cast<Scalar>();
+ MatrixUType u = svd.matrixU();
+ MatrixVType v = svd.matrixV();
+ VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
+ VERIFY_IS_UNITARY(u);
+ VERIFY_IS_UNITARY(v);
+}
+
+// Compare partial SVD defined by computationOptions to a full SVD referenceSvd
+template<typename SvdType, typename MatrixType>
+void svd_compare_to_full(const MatrixType& m,
+ unsigned int computationOptions,
+ const SvdType& referenceSvd)
+{
+ typedef typename MatrixType::Index Index;
+ Index rows = m.rows();
+ Index cols = m.cols();
+ Index diagSize = (std::min)(rows, cols);
+
+ SvdType svd(m, computationOptions);
+
+ VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
+ if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
+ if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
+ if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
+ if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
+}
+
+//
+template<typename SvdType, typename MatrixType>
+void svd_least_square(const MatrixType& m, unsigned int computationOptions)
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::RealScalar RealScalar;
+ typedef typename MatrixType::Index Index;
+ Index rows = m.rows();
+ Index cols = m.cols();
+
+ enum {
+ RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime
+ };
+
+ typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType;
+ typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
+
+ RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
+ SvdType svd(m, computationOptions);
+
+ if(internal::is_same<RealScalar,double>::value) svd.setThreshold(1e-8);
+ else if(internal::is_same<RealScalar,float>::value) svd.setThreshold(1e-4);
+
+ SolutionType x = svd.solve(rhs);
+
+ // evaluate normal equation which works also for least-squares solutions
+ if(internal::is_same<RealScalar,double>::value || svd.rank()==m.diagonal().size())
+ {
+ // This test is not stable with single precision.
+ // This is probably because squaring m signicantly affects the precision.
+ VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs);
+ }
+
+ RealScalar residual = (m*x-rhs).norm();
+ // Check that there is no significantly better solution in the neighborhood of x
+ if(!test_isMuchSmallerThan(residual,rhs.norm()))
+ {
+ // ^^^ If the residual is very small, then we have an exact solution, so we are already good.
+ for(Index k=0;k<x.rows();++k)
+ {
+ SolutionType y(x);
+ y.row(k) = (1.+2*NumTraits<RealScalar>::epsilon())*x.row(k);
+ RealScalar residual_y = (m*y-rhs).norm();
+ VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
+
+ y.row(k) = (1.-2*NumTraits<RealScalar>::epsilon())*x.row(k);
+ residual_y = (m*y-rhs).norm();
+ VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
+ }
+ }
+}
+
+// check minimal norm solutions, the inoput matrix m is only used to recover problem size
+template<typename MatrixType>
+void svd_min_norm(const MatrixType& m, unsigned int computationOptions)
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Index Index;
+ Index cols = m.cols();
+
+ enum {
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime
+ };
+
+ typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
+
+ // generate a full-rank m x n problem with m<n
+ enum {
+ RankAtCompileTime2 = ColsAtCompileTime==Dynamic ? Dynamic : (ColsAtCompileTime)/2+1,
+ RowsAtCompileTime3 = ColsAtCompileTime==Dynamic ? Dynamic : ColsAtCompileTime+1
+ };
+ typedef Matrix<Scalar, RankAtCompileTime2, ColsAtCompileTime> MatrixType2;
+ typedef Matrix<Scalar, RankAtCompileTime2, 1> RhsType2;
+ typedef Matrix<Scalar, ColsAtCompileTime, RankAtCompileTime2> MatrixType2T;
+ Index rank = RankAtCompileTime2==Dynamic ? internal::random<Index>(1,cols) : Index(RankAtCompileTime2);
+ MatrixType2 m2(rank,cols);
+ int guard = 0;
+ do {
+ m2.setRandom();
+ } while(SVD_FOR_MIN_NORM(MatrixType2)(m2).setThreshold(test_precision<Scalar>()).rank()!=rank && (++guard)<10);
+ VERIFY(guard<10);
+
+ RhsType2 rhs2 = RhsType2::Random(rank);
+ // use QR to find a reference minimal norm solution
+ HouseholderQR<MatrixType2T> qr(m2.adjoint());
+ Matrix<Scalar,Dynamic,1> tmp = qr.matrixQR().topLeftCorner(rank,rank).template triangularView<Upper>().adjoint().solve(rhs2);
+ tmp.conservativeResize(cols);
+ tmp.tail(cols-rank).setZero();
+ SolutionType x21 = qr.householderQ() * tmp;
+ // now check with SVD
+ SVD_FOR_MIN_NORM(MatrixType2) svd2(m2, computationOptions);
+ SolutionType x22 = svd2.solve(rhs2);
+ VERIFY_IS_APPROX(m2*x21, rhs2);
+ VERIFY_IS_APPROX(m2*x22, rhs2);
+ VERIFY_IS_APPROX(x21, x22);
+
+ // Now check with a rank deficient matrix
+ typedef Matrix<Scalar, RowsAtCompileTime3, ColsAtCompileTime> MatrixType3;
+ typedef Matrix<Scalar, RowsAtCompileTime3, 1> RhsType3;
+ Index rows3 = RowsAtCompileTime3==Dynamic ? internal::random<Index>(rank+1,2*cols) : Index(RowsAtCompileTime3);
+ Matrix<Scalar,RowsAtCompileTime3,Dynamic> C = Matrix<Scalar,RowsAtCompileTime3,Dynamic>::Random(rows3,rank);
+ MatrixType3 m3 = C * m2;
+ RhsType3 rhs3 = C * rhs2;
+ SVD_FOR_MIN_NORM(MatrixType3) svd3(m3, computationOptions);
+ SolutionType x3 = svd3.solve(rhs3);
+ VERIFY_IS_APPROX(m3*x3, rhs3);
+ VERIFY_IS_APPROX(m3*x21, rhs3);
+ VERIFY_IS_APPROX(m2*x3, rhs2);
+ VERIFY_IS_APPROX(x21, x3);
+}
+
+// Check full, compare_to_full, least_square, and min_norm for all possible compute-options
+template<typename SvdType, typename MatrixType>
+void svd_test_all_computation_options(const MatrixType& m, bool full_only)
+{
+// if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
+// return;
+ SvdType fullSvd(m, ComputeFullU|ComputeFullV);
+ CALL_SUBTEST(( svd_check_full(m, fullSvd) ));
+ CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeFullU | ComputeFullV) ));
+ CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeFullV) ));
+
+ #if defined __INTEL_COMPILER
+ // remark #111: statement is unreachable
+ #pragma warning disable 111
+ #endif
+ if(full_only)
+ return;
+
+ CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU, fullSvd) ));
+ CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullV, fullSvd) ));
+ CALL_SUBTEST(( svd_compare_to_full(m, 0, fullSvd) ));
+
+ if (MatrixType::ColsAtCompileTime == Dynamic) {
+ // thin U/V are only available with dynamic number of columns
+ CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) ));
+ CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinV, fullSvd) ));
+ CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) ));
+ CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU , fullSvd) ));
+ CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) ));
+
+ CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeFullU | ComputeThinV) ));
+ CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeThinU | ComputeFullV) ));
+ CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeThinU | ComputeThinV) ));
+
+ CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeThinV) ));
+ CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeFullV) ));
+ CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeThinV) ));
+
+ // test reconstruction
+ typedef typename MatrixType::Index Index;
+ Index diagSize = (std::min)(m.rows(), m.cols());
+ SvdType svd(m, ComputeThinU | ComputeThinV);
+ VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
+ }
+}
+
+template<typename MatrixType>
+void svd_fill_random(MatrixType &m)
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::RealScalar RealScalar;
+ typedef typename MatrixType::Index Index;
+ Index diagSize = (std::min)(m.rows(), m.cols());
+ RealScalar s = std::numeric_limits<RealScalar>::max_exponent10/4;
+ s = internal::random<RealScalar>(1,s);
+ Matrix<RealScalar,Dynamic,1> d = Matrix<RealScalar,Dynamic,1>::Random(diagSize);
+ for(Index k=0; k<diagSize; ++k)
+ d(k) = d(k)*std::pow(RealScalar(10),internal::random<RealScalar>(-s,s));
+
+ bool dup = internal::random<int>(0,10) < 3;
+ bool unit_uv = internal::random<int>(0,10) < (dup?7:3); // if we duplicate some diagonal entries, then increase the chance to preserve them using unitary U and V factors
+
+ // duplicate some singular values
+ if(dup)
+ {
+ Index n = internal::random<Index>(0,d.size()-1);
+ for(Index i=0; i<n; ++i)
+ d(internal::random<Index>(0,d.size()-1)) = d(internal::random<Index>(0,d.size()-1));
+ }
+
+ Matrix<Scalar,Dynamic,Dynamic> U(m.rows(),diagSize);
+ Matrix<Scalar,Dynamic,Dynamic> VT(diagSize,m.cols());
+ if(unit_uv)
+ {
+ // in very rare cases let's try with a pure diagonal matrix
+ if(internal::random<int>(0,10) < 1)
+ {
+ U.setIdentity();
+ VT.setIdentity();
+ }
+ else
+ {
+ createRandomPIMatrixOfRank(diagSize,U.rows(), U.cols(), U);
+ createRandomPIMatrixOfRank(diagSize,VT.rows(), VT.cols(), VT);
+ }
+ }
+ else
+ {
+ U.setRandom();
+ VT.setRandom();
+ }
+
+ m = U * d.asDiagonal() * VT;
+
+ // (partly) cancel some coeffs
+ if(!(dup && unit_uv))
+ {
+ Matrix<Scalar,Dynamic,1> samples(7);
+ samples << 0, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -1./NumTraits<RealScalar>::highest(), 1./NumTraits<RealScalar>::highest();
+ Index n = internal::random<Index>(0,m.size()-1);
+ for(Index i=0; i<n; ++i)
+ m(internal::random<Index>(0,m.rows()-1), internal::random<Index>(0,m.cols()-1)) = samples(internal::random<Index>(0,6));
+ }
+}
+
+
+// work around stupid msvc error when constructing at compile time an expression that involves
+// a division by zero, even if the numeric type has floating point
+template<typename Scalar>
+EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
+
+// workaround aggressive optimization in ICC
+template<typename T> EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; }
+
+// all this function does is verify we don't iterate infinitely on nan/inf values
+template<typename SvdType, typename MatrixType>
+void svd_inf_nan()
+{
+ SvdType svd;
+ typedef typename MatrixType::Scalar Scalar;
+ Scalar some_inf = Scalar(1) / zero<Scalar>();
+ VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
+ svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
+
+ Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+ VERIFY(nan != nan);
+ svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV);
+
+ MatrixType m = MatrixType::Zero(10,10);
+ m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
+ svd.compute(m, ComputeFullU | ComputeFullV);
+
+ m = MatrixType::Zero(10,10);
+ m(internal::random<int>(0,9), internal::random<int>(0,9)) = nan;
+ svd.compute(m, ComputeFullU | ComputeFullV);
+
+ // regression test for bug 791
+ m.resize(3,3);
+ m << 0, 2*NumTraits<Scalar>::epsilon(), 0.5,
+ 0, -0.5, 0,
+ nan, 0, 0;
+ svd.compute(m, ComputeFullU | ComputeFullV);
+
+ m.resize(4,4);
+ m << 1, 0, 0, 0,
+ 0, 3, 1, 2e-308,
+ 1, 0, 1, nan,
+ 0, nan, nan, 0;
+ svd.compute(m, ComputeFullU | ComputeFullV);
+}
+
+// Regression test for bug 286: JacobiSVD loops indefinitely with some
+// matrices containing denormal numbers.
+void svd_underoverflow()
+{
+#if defined __INTEL_COMPILER
+// shut up warning #239: floating point underflow
+#pragma warning push
+#pragma warning disable 239
+#endif
+ Matrix2d M;
+ M << -7.90884e-313, -4.94e-324,
+ 0, 5.60844e-313;
+ SVD_DEFAULT(Matrix2d) svd;
+ svd.compute(M,ComputeFullU|ComputeFullV);
+ CALL_SUBTEST( svd_check_full(M,svd) );
+
+ // Check all 2x2 matrices made with the following coefficients:
+ VectorXd value_set(9);
+ value_set << 0, 1, -1, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -4.94e-223, 4.94e-223;
+ Array4i id(0,0,0,0);
+ int k = 0;
+ do
+ {
+ M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3));
+ svd.compute(M,ComputeFullU|ComputeFullV);
+ CALL_SUBTEST( svd_check_full(M,svd) );
+
+ id(k)++;
+ if(id(k)>=value_set.size())
+ {
+ while(k<3 && id(k)>=value_set.size()) id(++k)++;
+ id.head(k).setZero();
+ k=0;
+ }
+
+ } while((id<int(value_set.size())).all());
+
+#if defined __INTEL_COMPILER
+#pragma warning pop
+#endif
+
+ // Check for overflow:
+ Matrix3d M3;
+ M3 << 4.4331978442502944e+307, -5.8585363752028680e+307, 6.4527017443412964e+307,
+ 3.7841695601406358e+307, 2.4331702789740617e+306, -3.5235707140272905e+307,
+ -8.7190887618028355e+307, -7.3453213709232193e+307, -2.4367363684472105e+307;
+
+ SVD_DEFAULT(Matrix3d) svd3;
+ svd3.compute(M3,ComputeFullU|ComputeFullV); // just check we don't loop indefinitely
+ CALL_SUBTEST( svd_check_full(M3,svd3) );
+}
+
+// void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
+
+template<typename MatrixType>
+void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) )
+{
+ MatrixType M;
+ VectorXd value_set(3);
+ value_set << 0, 1, -1;
+ Array4i id(0,0,0,0);
+ int k = 0;
+ do
+ {
+ M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3));
+
+ cb(M,false);
+
+ id(k)++;
+ if(id(k)>=value_set.size())
+ {
+ while(k<3 && id(k)>=value_set.size()) id(++k)++;
+ id.head(k).setZero();
+ k=0;
+ }
+
+ } while((id<int(value_set.size())).all());
+}
+
+void svd_preallocate()
+{
+ Vector3f v(3.f, 2.f, 1.f);
+ MatrixXf m = v.asDiagonal();
+
+ internal::set_is_malloc_allowed(false);
+ VERIFY_RAISES_ASSERT(VectorXf tmp(10);)
+ SVD_DEFAULT(MatrixXf) svd;
+ internal::set_is_malloc_allowed(true);
+ svd.compute(m);
+ VERIFY_IS_APPROX(svd.singularValues(), v);
+
+ SVD_DEFAULT(MatrixXf) svd2(3,3);
+ internal::set_is_malloc_allowed(false);
+ svd2.compute(m);
+ internal::set_is_malloc_allowed(true);
+ VERIFY_IS_APPROX(svd2.singularValues(), v);
+ VERIFY_RAISES_ASSERT(svd2.matrixU());
+ VERIFY_RAISES_ASSERT(svd2.matrixV());
+ svd2.compute(m, ComputeFullU | ComputeFullV);
+ VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
+ VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
+ internal::set_is_malloc_allowed(false);
+ svd2.compute(m);
+ internal::set_is_malloc_allowed(true);
+
+ SVD_DEFAULT(MatrixXf) svd3(3,3,ComputeFullU|ComputeFullV);
+ internal::set_is_malloc_allowed(false);
+ svd2.compute(m);
+ internal::set_is_malloc_allowed(true);
+ VERIFY_IS_APPROX(svd2.singularValues(), v);
+ VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
+ VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
+ internal::set_is_malloc_allowed(false);
+ svd2.compute(m, ComputeFullU|ComputeFullV);
+ internal::set_is_malloc_allowed(true);
+}
+
+template<typename SvdType,typename MatrixType>
+void svd_verify_assert(const MatrixType& m)
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Index Index;
+ Index rows = m.rows();
+ Index cols = m.cols();
+
+ enum {
+ RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime
+ };
+
+ typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType;
+ RhsType rhs(rows);
+ SvdType svd;
+ VERIFY_RAISES_ASSERT(svd.matrixU())
+ VERIFY_RAISES_ASSERT(svd.singularValues())
+ VERIFY_RAISES_ASSERT(svd.matrixV())
+ VERIFY_RAISES_ASSERT(svd.solve(rhs))
+ MatrixType a = MatrixType::Zero(rows, cols);
+ a.setZero();
+ svd.compute(a, 0);
+ VERIFY_RAISES_ASSERT(svd.matrixU())
+ VERIFY_RAISES_ASSERT(svd.matrixV())
+ svd.singularValues();
+ VERIFY_RAISES_ASSERT(svd.solve(rhs))
+
+ if (ColsAtCompileTime == Dynamic)
+ {
+ svd.compute(a, ComputeThinU);
+ svd.matrixU();
+ VERIFY_RAISES_ASSERT(svd.matrixV())
+ VERIFY_RAISES_ASSERT(svd.solve(rhs))
+ svd.compute(a, ComputeThinV);
+ svd.matrixV();
+ VERIFY_RAISES_ASSERT(svd.matrixU())
+ VERIFY_RAISES_ASSERT(svd.solve(rhs))
+ }
+ else
+ {
+ VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
+ VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
+ }
+}
+
+#undef SVD_DEFAULT
+#undef SVD_FOR_MIN_NORM
diff --git a/test/swap.cpp b/test/swap.cpp
index 36b353148..dc3610085 100644
--- a/test/swap.cpp
+++ b/test/swap.cpp
@@ -41,9 +41,15 @@ template<typename MatrixType> void swap(const MatrixType& m)
OtherMatrixType m3_copy = m3;
// test swapping 2 matrices of same type
+ Scalar *d1=m1.data(), *d2=m2.data();
m1.swap(m2);
VERIFY_IS_APPROX(m1,m2_copy);
VERIFY_IS_APPROX(m2,m1_copy);
+ if(MatrixType::SizeAtCompileTime==Dynamic)
+ {
+ VERIFY(m1.data()==d2);
+ VERIFY(m2.data()==d1);
+ }
m1 = m1_copy;
m2 = m2_copy;
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index b069f0771..2f839cf51 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -27,19 +27,37 @@ std::string demangle_unrolling(int t)
if(t==CompleteUnrolling) return "CompleteUnrolling";
return "?";
}
+std::string demangle_flags(int f)
+{
+ std::string res;
+ if(f&RowMajorBit) res += " | RowMajor";
+ if(f&PacketAccessBit) res += " | Packet";
+ if(f&LinearAccessBit) res += " | Linear";
+ if(f&LvalueBit) res += " | Lvalue";
+ if(f&DirectAccessBit) res += " | Direct";
+ if(f&AlignedBit) res += " | Aligned";
+ if(f&NestByRefBit) res += " | NestByRef";
+ if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit";
+
+ return res;
+}
template<typename Dst, typename Src>
bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
{
- internal::assign_traits<Dst,Src>::debug();
- bool res = internal::assign_traits<Dst,Src>::Traversal==traversal
- && internal::assign_traits<Dst,Src>::Unrolling==unrolling;
+ typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
+ bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
if(!res)
{
+ std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl;
+ std::cerr << " " << demangle_flags(internal::evaluator<Src>::Flags) << std::endl;
+ std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl;
+ std::cerr << " " << demangle_flags(internal::evaluator<Dst>::Flags) << std::endl;
+ traits::debug();
std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
- << " got " << demangle_traversal(internal::assign_traits<Dst,Src>::Traversal) << "\n";
+ << " got " << demangle_traversal(traits::Traversal) << "\n";
std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
- << " got " << demangle_unrolling(internal::assign_traits<Dst,Src>::Unrolling) << "\n";
+ << " got " << demangle_unrolling(traits::Unrolling) << "\n";
}
return res;
}
@@ -47,15 +65,19 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
template<typename Dst, typename Src>
bool test_assign(int traversal, int unrolling)
{
- internal::assign_traits<Dst,Src>::debug();
- bool res = internal::assign_traits<Dst,Src>::Traversal==traversal
- && internal::assign_traits<Dst,Src>::Unrolling==unrolling;
+ typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
+ bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
if(!res)
{
+ std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl;
+ std::cerr << " " << demangle_flags(internal::evaluator<Src>::Flags) << std::endl;
+ std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl;
+ std::cerr << " " << demangle_flags(internal::evaluator<Dst>::Flags) << std::endl;
+ traits::debug();
std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
- << " got " << demangle_traversal(internal::assign_traits<Dst,Src>::Traversal) << "\n";
+ << " got " << demangle_traversal(traits::Traversal) << "\n";
std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
- << " got " << demangle_unrolling(internal::assign_traits<Dst,Src>::Unrolling) << "\n";
+ << " got " << demangle_unrolling(traits::Unrolling) << "\n";
}
return res;
}
@@ -63,10 +85,15 @@ bool test_assign(int traversal, int unrolling)
template<typename Xpr>
bool test_redux(const Xpr&, int traversal, int unrolling)
{
- typedef internal::redux_traits<internal::scalar_sum_op<typename Xpr::Scalar>,Xpr> traits;
+ typedef internal::redux_traits<internal::scalar_sum_op<typename Xpr::Scalar>,internal::redux_evaluator<Xpr> > traits;
+
bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
if(!res)
{
+ std::cerr << demangle_flags(Xpr::Flags) << std::endl;
+ std::cerr << demangle_flags(internal::evaluator<Xpr>::Flags) << std::endl;
+ traits::debug();
+
std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
<< " got " << demangle_traversal(traits::Traversal) << "\n";
std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp
index 6cd1acdda..1631d54c4 100644
--- a/test/vectorwiseop.cpp
+++ b/test/vectorwiseop.cpp
@@ -104,8 +104,8 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)
m2 = m1;
// yes, there might be an aliasing issue there but ".rowwise() /="
- // is suppposed to evaluate " m2.colwise().sum()" into to temporary to avoid
- // evaluating the reducions multiple times
+ // is supposed to evaluate " m2.colwise().sum()" into a temporary to avoid
+ // evaluating the reduction multiple times
if(ArrayType::RowsAtCompileTime>2 || ArrayType::RowsAtCompileTime==Dynamic)
{
m2.rowwise() /= m2.colwise().sum();
diff --git a/unsupported/Eigen/AlignedVector3 b/unsupported/Eigen/AlignedVector3
index 7b45e6cce..1fce00525 100644
--- a/unsupported/Eigen/AlignedVector3
+++ b/unsupported/Eigen/AlignedVector3
@@ -57,6 +57,11 @@ template<typename _Scalar> class AlignedVector3
inline Index rows() const { return 3; }
inline Index cols() const { return 1; }
+
+ Scalar* data() { return m_coeffs.data(); }
+ const Scalar* data() const { return m_coeffs.data(); }
+ Index innerStride() const { return 1; }
+ Index outerStride() const { return m_coeffs.outerStride(); }
inline const Scalar& coeff(Index row, Index col) const
{ return m_coeffs.coeff(row, col); }
@@ -181,8 +186,28 @@ template<typename _Scalar> class AlignedVector3
{
return m_coeffs.template head<3>().isApprox(other,eps);
}
+
+ CoeffType& coeffs() { return m_coeffs; }
+ const CoeffType& coeffs() const { return m_coeffs; }
};
+namespace internal {
+
+template<typename Scalar>
+struct evaluator<AlignedVector3<Scalar> >
+ : evaluator<Matrix<Scalar,4,1> >::type
+{
+ typedef AlignedVector3<Scalar> XprType;
+ typedef typename evaluator<Matrix<Scalar,4,1> >::type Base;
+
+ typedef evaluator type;
+ typedef evaluator nestedType;
+
+ evaluator(const XprType &m) : Base(m.coeffs()) {}
+};
+
+}
+
//@}
}
diff --git a/unsupported/Eigen/BDCSVD b/unsupported/Eigen/BDCSVD
deleted file mode 100644
index 44649dbd0..000000000
--- a/unsupported/Eigen/BDCSVD
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef EIGEN_BDCSVD_MODULE_H
-#define EIGEN_BDCSVD_MODULE_H
-
-#include <Eigen/SVD>
-
-#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
-
-/** \defgroup BDCSVD_Module BDCSVD module
- *
- *
- *
- * This module provides Divide & Conquer SVD decomposition for matrices (both real and complex).
- * This decomposition is accessible via the following MatrixBase method:
- * - MatrixBase::bdcSvd()
- *
- * \code
- * #include <Eigen/BDCSVD>
- * \endcode
- */
-
-#include "src/BDCSVD/BDCSVD.h"
-
-#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_BDCSVD_MODULE_H
-/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
index 7e448f7c0..a96d705a4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
@@ -89,6 +89,9 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other)
{
+
+std::cout << "In assignment operator " << std::endl;
+
typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
Assign assign(*this, other);
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index 022d20360..a844a4d68 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -157,50 +157,50 @@ struct eval<const TensorRef<PlainObjectType>, Eigen::Dense>
template <typename Scalar_, std::size_t NumIndices_, int Options_>
-struct nested<Tensor<Scalar_, NumIndices_, Options_>, 1, typename eval<Tensor<Scalar_, NumIndices_, Options_> >::type>
+struct nested<Tensor<Scalar_, NumIndices_, Options_> >
{
typedef const Tensor<Scalar_, NumIndices_, Options_>& type;
};
template <typename Scalar_, std::size_t NumIndices_, int Options_>
-struct nested<const Tensor<Scalar_, NumIndices_, Options_>, 1, typename eval<const Tensor<Scalar_, NumIndices_, Options_> >::type>
+struct nested<const Tensor<Scalar_, NumIndices_, Options_> >
{
typedef const Tensor<Scalar_, NumIndices_, Options_>& type;
};
template <typename Scalar_, typename Dimensions, int Options>
-struct nested<TensorFixedSize<Scalar_, Dimensions, Options>, 1, typename eval<TensorFixedSize<Scalar_, Dimensions, Options> >::type>
+struct nested<TensorFixedSize<Scalar_, Dimensions, Options> >
{
typedef const TensorFixedSize<Scalar_, Dimensions, Options>& type;
};
template <typename Scalar_, typename Dimensions, int Options>
-struct nested<const TensorFixedSize<Scalar_, Dimensions, Options>, 1, typename eval<const TensorFixedSize<Scalar_, Dimensions, Options> >::type>
+struct nested<const TensorFixedSize<Scalar_, Dimensions, Options> >
{
typedef const TensorFixedSize<Scalar_, Dimensions, Options>& type;
};
template <typename PlainObjectType, int Options>
-struct nested<TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type>
+struct nested<TensorMap<PlainObjectType, Options> >
{
typedef const TensorMap<PlainObjectType, Options>& type;
};
template <typename PlainObjectType, int Options>
-struct nested<const TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type>
+struct nested<const TensorMap<PlainObjectType, Options> >
{
typedef const TensorMap<PlainObjectType, Options>& type;
};
template <typename PlainObjectType>
-struct nested<TensorRef<PlainObjectType>, 1, typename eval<TensorRef<PlainObjectType> >::type>
+struct nested<TensorRef<PlainObjectType> >
{
typedef const TensorRef<PlainObjectType>& type;
};
template <typename PlainObjectType>
-struct nested<const TensorRef<PlainObjectType>, 1, typename eval<TensorRef<PlainObjectType> >::type>
+struct nested<const TensorRef<PlainObjectType> >
{
typedef const TensorRef<PlainObjectType>& type;
};
diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers
index aa15403db..ff0d59b6e 100644
--- a/unsupported/Eigen/IterativeSolvers
+++ b/unsupported/Eigen/IterativeSolvers
@@ -24,9 +24,6 @@
*/
//@{
-#include "../../Eigen/src/misc/Solve.h"
-#include "../../Eigen/src/misc/SparseSolve.h"
-
#ifndef EIGEN_MPL2_ONLY
#include "src/IterativeSolvers/IterationController.h"
#include "src/IterativeSolvers/ConstrainedConjGrad.h"
diff --git a/unsupported/Eigen/MPRealSupport b/unsupported/Eigen/MPRealSupport
index 632de3854..8e42965a3 100644
--- a/unsupported/Eigen/MPRealSupport
+++ b/unsupported/Eigen/MPRealSupport
@@ -159,10 +159,10 @@ int main()
{
if(rows==0 || cols==0 || depth==0)
return;
-
+
mpreal acc1(0,mpfr_get_prec(blockA[0].mpfr_srcptr())),
tmp (0,mpfr_get_prec(blockA[0].mpfr_srcptr()));
-
+
if(strideA==-1) strideA = depth;
if(strideB==-1) strideB = depth;
diff --git a/unsupported/Eigen/OpenGLSupport b/unsupported/Eigen/OpenGLSupport
index c4090ab11..6ca1b1217 100644
--- a/unsupported/Eigen/OpenGLSupport
+++ b/unsupported/Eigen/OpenGLSupport
@@ -51,7 +51,7 @@ namespace internal {
typename Scalar = typename XprType::Scalar, \
int Rows = XprType::RowsAtCompileTime, \
int Cols = XprType::ColsAtCompileTime, \
- bool IsGLCompatible = bool(XprType::Flags&LinearAccessBit) \
+ bool IsGLCompatible = bool(internal::evaluator<XprType>::Flags&LinearAccessBit) \
&& bool(XprType::Flags&DirectAccessBit) \
&& (XprType::IsVectorAtCompileTime || (XprType::Flags&RowMajorBit)==0)> \
struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl); \
@@ -178,11 +178,11 @@ template<typename Scalar> void glLoadMatrix(const Transform<Scalar,3,Affine>& t)
template<typename Scalar> void glLoadMatrix(const Transform<Scalar,3,Projective>& t) { glLoadMatrix(t.matrix()); }
template<typename Scalar> void glLoadMatrix(const Transform<Scalar,3,AffineCompact>& t) { glLoadMatrix(Transform<Scalar,3,Affine>(t).matrix()); }
-static void glRotate(const Rotation2D<float>& rot)
+inline void glRotate(const Rotation2D<float>& rot)
{
glRotatef(rot.angle()*180.f/float(M_PI), 0.f, 0.f, 1.f);
}
-static void glRotate(const Rotation2D<double>& rot)
+inline void glRotate(const Rotation2D<double>& rot)
{
glRotated(rot.angle()*180.0/M_PI, 0.0, 0.0, 1.0);
}
@@ -203,7 +203,7 @@ namespace internal {
typename Scalar = typename XprType::Scalar, \
int Rows = XprType::RowsAtCompileTime, \
int Cols = XprType::ColsAtCompileTime, \
- bool IsGLCompatible = bool(XprType::Flags&LinearAccessBit) \
+ bool IsGLCompatible = bool(internal::evaluator<XprType>::Flags&LinearAccessBit) \
&& bool(XprType::Flags&DirectAccessBit) \
&& (XprType::IsVectorAtCompileTime || (XprType::Flags&RowMajorBit)==0)> \
struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl); \
@@ -246,18 +246,18 @@ EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glGet,GLenum,_,double, 4,4,Doublev)
#ifdef GL_VERSION_2_0
-static void glUniform2fv_ei (GLint loc, const float* v) { glUniform2fv(loc,1,v); }
-static void glUniform2iv_ei (GLint loc, const int* v) { glUniform2iv(loc,1,v); }
+inline void glUniform2fv_ei (GLint loc, const float* v) { glUniform2fv(loc,1,v); }
+inline void glUniform2iv_ei (GLint loc, const int* v) { glUniform2iv(loc,1,v); }
-static void glUniform3fv_ei (GLint loc, const float* v) { glUniform3fv(loc,1,v); }
-static void glUniform3iv_ei (GLint loc, const int* v) { glUniform3iv(loc,1,v); }
+inline void glUniform3fv_ei (GLint loc, const float* v) { glUniform3fv(loc,1,v); }
+inline void glUniform3iv_ei (GLint loc, const int* v) { glUniform3iv(loc,1,v); }
-static void glUniform4fv_ei (GLint loc, const float* v) { glUniform4fv(loc,1,v); }
-static void glUniform4iv_ei (GLint loc, const int* v) { glUniform4iv(loc,1,v); }
+inline void glUniform4fv_ei (GLint loc, const float* v) { glUniform4fv(loc,1,v); }
+inline void glUniform4iv_ei (GLint loc, const int* v) { glUniform4iv(loc,1,v); }
-static void glUniformMatrix2fv_ei (GLint loc, const float* v) { glUniformMatrix2fv(loc,1,false,v); }
-static void glUniformMatrix3fv_ei (GLint loc, const float* v) { glUniformMatrix3fv(loc,1,false,v); }
-static void glUniformMatrix4fv_ei (GLint loc, const float* v) { glUniformMatrix4fv(loc,1,false,v); }
+inline void glUniformMatrix2fv_ei (GLint loc, const float* v) { glUniformMatrix2fv(loc,1,false,v); }
+inline void glUniformMatrix3fv_ei (GLint loc, const float* v) { glUniformMatrix3fv(loc,1,false,v); }
+inline void glUniformMatrix4fv_ei (GLint loc, const float* v) { glUniformMatrix4fv(loc,1,false,v); }
EIGEN_GL_FUNC1_DECLARATION (glUniform,GLint,const)
@@ -294,9 +294,9 @@ EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 4,3,Matrix
#ifdef GL_VERSION_3_0
-static void glUniform2uiv_ei (GLint loc, const unsigned int* v) { glUniform2uiv(loc,1,v); }
-static void glUniform3uiv_ei (GLint loc, const unsigned int* v) { glUniform3uiv(loc,1,v); }
-static void glUniform4uiv_ei (GLint loc, const unsigned int* v) { glUniform4uiv(loc,1,v); }
+inline void glUniform2uiv_ei (GLint loc, const unsigned int* v) { glUniform2uiv(loc,1,v); }
+inline void glUniform3uiv_ei (GLint loc, const unsigned int* v) { glUniform3uiv(loc,1,v); }
+inline void glUniform4uiv_ei (GLint loc, const unsigned int* v) { glUniform4uiv(loc,1,v); }
EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 2,2uiv_ei)
EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 3,3uiv_ei)
@@ -305,9 +305,9 @@ EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 4,4uiv_ei)
#endif
#ifdef GL_ARB_gpu_shader_fp64
-static void glUniform2dv_ei (GLint loc, const double* v) { glUniform2dv(loc,1,v); }
-static void glUniform3dv_ei (GLint loc, const double* v) { glUniform3dv(loc,1,v); }
-static void glUniform4dv_ei (GLint loc, const double* v) { glUniform4dv(loc,1,v); }
+inline void glUniform2dv_ei (GLint loc, const double* v) { glUniform2dv(loc,1,v); }
+inline void glUniform3dv_ei (GLint loc, const double* v) { glUniform3dv(loc,1,v); }
+inline void glUniform4dv_ei (GLint loc, const double* v) { glUniform4dv(loc,1,v); }
EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 2,2dv_ei)
EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 3,3dv_ei)
diff --git a/unsupported/Eigen/SparseExtra b/unsupported/Eigen/SparseExtra
index b5597902a..819cffa27 100644
--- a/unsupported/Eigen/SparseExtra
+++ b/unsupported/Eigen/SparseExtra
@@ -37,9 +37,6 @@
*/
-#include "../../Eigen/src/misc/Solve.h"
-#include "../../Eigen/src/misc/SparseSolve.h"
-
#include "src/SparseExtra/DynamicSparseMatrix.h"
#include "src/SparseExtra/BlockOfDynamicSparseMatrix.h"
#include "src/SparseExtra/RandomSetter.h"
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 590797973..8336c2644 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -593,7 +593,6 @@ inline const AutoDiffScalar<Matrix<typename internal::traits<DerTypeA>::Scalar,D
atan2(const AutoDiffScalar<DerTypeA>& a, const AutoDiffScalar<DerTypeB>& b)
{
using std::atan2;
- using std::max;
typedef typename internal::traits<DerTypeA>::Scalar Scalar;
typedef AutoDiffScalar<Matrix<Scalar,Dynamic,1> > PlainADS;
PlainADS ret;
diff --git a/unsupported/Eigen/src/BDCSVD/BDCSVD.h b/unsupported/Eigen/src/BDCSVD/BDCSVD.h
deleted file mode 100644
index a7c369633..000000000
--- a/unsupported/Eigen/src/BDCSVD/BDCSVD.h
+++ /dev/null
@@ -1,949 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD"
-// research report written by Ming Gu and Stanley C.Eisenstat
-// The code variable names correspond to the names they used in their
-// report
-//
-// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
-// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
-// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
-// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
-// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
-//
-// Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_BDCSVD_H
-#define EIGEN_BDCSVD_H
-
-#define EPSILON 0.0000000000000001
-
-#define ALGOSWAP 16
-
-namespace Eigen {
-
-template<typename _MatrixType> class BDCSVD;
-
-namespace internal {
-
-template<typename _MatrixType>
-struct traits<BDCSVD<_MatrixType> >
-{
- typedef _MatrixType MatrixType;
-};
-
-} // end namespace internal
-
-
-/** \ingroup SVD_Module
- *
- *
- * \class BDCSVD
- *
- * \brief class Bidiagonal Divide and Conquer SVD
- *
- * \param MatrixType the type of the matrix of which we are computing the SVD decomposition
- * We plan to have a very similar interface to JacobiSVD on this class.
- * It should be used to speed up the calcul of SVD for big matrices.
- */
-template<typename _MatrixType>
-class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
-{
- typedef SVDBase<BDCSVD> Base;
-
-public:
- using Base::rows;
- using Base::cols;
-
- typedef _MatrixType MatrixType;
- typedef typename MatrixType::Scalar Scalar;
- typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
- typedef typename MatrixType::Index Index;
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime,
- DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime),
- MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
- MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
- MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime),
- MatrixOptions = MatrixType::Options
- };
-
- typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime,
- MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime>
- MatrixUType;
- typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime,
- MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime>
- MatrixVType;
- typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
- typedef typename internal::plain_row_type<MatrixType>::type RowType;
- typedef typename internal::plain_col_type<MatrixType>::type ColType;
- typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX;
- typedef Matrix<RealScalar, Dynamic, Dynamic> MatrixXr;
- typedef Matrix<RealScalar, Dynamic, 1> VectorType;
- typedef Array<RealScalar, Dynamic, 1> ArrayXr;
-
- /** \brief Default Constructor.
- *
- * The default constructor is useful in cases in which the user intends to
- * perform decompositions via BDCSVD::compute(const MatrixType&).
- */
- BDCSVD() : algoswap(ALGOSWAP), m_numIters(0)
- {}
-
-
- /** \brief Default Constructor with memory preallocation
- *
- * Like the default constructor but with preallocation of the internal data
- * according to the specified problem size.
- * \sa BDCSVD()
- */
- BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)
- : algoswap(ALGOSWAP), m_numIters(0)
- {
- allocate(rows, cols, computationOptions);
- }
-
- /** \brief Constructor performing the decomposition of given matrix.
- *
- * \param matrix the matrix to decompose
- * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
- * By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
- * #ComputeFullV, #ComputeThinV.
- *
- * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
- * available with the (non - default) FullPivHouseholderQR preconditioner.
- */
- BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
- : algoswap(ALGOSWAP), m_numIters(0)
- {
- compute(matrix, computationOptions);
- }
-
- ~BDCSVD()
- {
- }
-
- /** \brief Method performing the decomposition of given matrix using custom options.
- *
- * \param matrix the matrix to decompose
- * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
- * By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
- * #ComputeFullV, #ComputeThinV.
- *
- * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
- * available with the (non - default) FullPivHouseholderQR preconditioner.
- */
- BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
-
- /** \brief Method performing the decomposition of given matrix using current options.
- *
- * \param matrix the matrix to decompose
- *
- * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
- */
- BDCSVD& compute(const MatrixType& matrix)
- {
- return compute(matrix, this->m_computationOptions);
- }
-
- void setSwitchSize(int s)
- {
- eigen_assert(s>3 && "BDCSVD the size of the algo switch has to be greater than 3");
- algoswap = s;
- }
-
-
- /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
- *
- * \param b the right - hand - side of the equation to solve.
- *
- * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
- *
- * \note SVD solving is implicitly least - squares. Thus, this method serves both purposes of exact solving and least - squares solving.
- * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
- */
- template<typename Rhs>
- inline const internal::solve_retval<BDCSVD, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(this->m_isInitialized && "BDCSVD is not initialized.");
- eigen_assert(computeU() && computeV() &&
- "BDCSVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
- return internal::solve_retval<BDCSVD, Rhs>(*this, b.derived());
- }
-
-
- const MatrixUType& matrixU() const
- {
- eigen_assert(this->m_isInitialized && "SVD is not initialized.");
- if (isTranspose){
- eigen_assert(this->computeV() && "This SVD decomposition didn't compute U. Did you ask for it?");
- return this->m_matrixV;
- }
- else
- {
- eigen_assert(this->computeU() && "This SVD decomposition didn't compute U. Did you ask for it?");
- return this->m_matrixU;
- }
-
- }
-
-
- const MatrixVType& matrixV() const
- {
- eigen_assert(this->m_isInitialized && "SVD is not initialized.");
- if (isTranspose){
- eigen_assert(this->computeU() && "This SVD decomposition didn't compute V. Did you ask for it?");
- return this->m_matrixU;
- }
- else
- {
- eigen_assert(this->computeV() && "This SVD decomposition didn't compute V. Did you ask for it?");
- return this->m_matrixV;
- }
- }
-
- using Base::computeU;
- using Base::computeV;
-
-private:
- void allocate(Index rows, Index cols, unsigned int computationOptions);
- void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
- void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
- void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, VectorType& singVals,
- ArrayXr& shifts, ArrayXr& mus);
- void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const VectorType& singVals,
- const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat);
- void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const VectorType& singVals,
- const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V);
- void deflation43(Index firstCol, Index shift, Index i, Index size);
- void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
- void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
- void copyUV(const typename internal::UpperBidiagonalization<MatrixX>::HouseholderUSequenceType& householderU,
- const typename internal::UpperBidiagonalization<MatrixX>::HouseholderVSequenceType& householderV);
-
-protected:
- MatrixXr m_naiveU, m_naiveV;
- MatrixXr m_computed;
- Index nRec;
- int algoswap;
- bool isTranspose, compU, compV;
-
-public:
- int m_numIters;
-}; //end class BDCSVD
-
-
-// Methode to allocate ans initialize matrix and attributs
-template<typename MatrixType>
-void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
-{
- isTranspose = (cols > rows);
- if (Base::allocate(rows, cols, computationOptions)) return;
- m_computed = MatrixXr::Zero(this->m_diagSize + 1, this->m_diagSize );
- if (isTranspose){
- compU = this->computeU();
- compV = this->computeV();
- }
- else
- {
- compV = this->computeU();
- compU = this->computeV();
- }
- if (compU) m_naiveU = MatrixXr::Zero(this->m_diagSize + 1, this->m_diagSize + 1 );
- else m_naiveU = MatrixXr::Zero(2, this->m_diagSize + 1 );
-
- if (compV) m_naiveV = MatrixXr::Zero(this->m_diagSize, this->m_diagSize);
-
-
- //should be changed for a cleaner implementation
- if (isTranspose){
- bool aux;
- if (this->computeU()||this->computeV()){
- aux = this->m_computeFullU;
- this->m_computeFullU = this->m_computeFullV;
- this->m_computeFullV = aux;
- aux = this->m_computeThinU;
- this->m_computeThinU = this->m_computeThinV;
- this->m_computeThinV = aux;
- }
- }
-}// end allocate
-
-// Methode which compute the BDCSVD for the int
-template<>
-BDCSVD<Matrix<int, Dynamic, Dynamic> >& BDCSVD<Matrix<int, Dynamic, Dynamic> >::compute(const MatrixType& matrix, unsigned int computationOptions) {
- allocate(matrix.rows(), matrix.cols(), computationOptions);
- this->m_nonzeroSingularValues = 0;
- m_computed = Matrix<int, Dynamic, Dynamic>::Zero(rows(), cols());
- for (int i=0; i<this->m_diagSize; i++) {
- this->m_singularValues.coeffRef(i) = 0;
- }
- if (this->m_computeFullU) this->m_matrixU = Matrix<int, Dynamic, Dynamic>::Zero(rows(), rows());
- if (this->m_computeFullV) this->m_matrixV = Matrix<int, Dynamic, Dynamic>::Zero(cols(), cols());
- this->m_isInitialized = true;
- return *this;
-}
-
-
-// Methode which compute the BDCSVD
-template<typename MatrixType>
-BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions)
-{
- allocate(matrix.rows(), matrix.cols(), computationOptions);
- using std::abs;
-
- //**** step 1 Bidiagonalization isTranspose = (matrix.cols()>matrix.rows()) ;
- MatrixType copy;
- if (isTranspose) copy = matrix.adjoint();
- else copy = matrix;
-
- internal::UpperBidiagonalization<MatrixX> bid(copy);
-
- //**** step 2 Divide
- m_computed.topRows(this->m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
- m_computed.template bottomRows<1>().setZero();
- divide(0, this->m_diagSize - 1, 0, 0, 0);
-
- //**** step 3 copy
- for (int i=0; i<this->m_diagSize; i++) {
- RealScalar a = abs(m_computed.coeff(i, i));
- this->m_singularValues.coeffRef(i) = a;
- if (a == 0){
- this->m_nonzeroSingularValues = i;
- this->m_singularValues.tail(this->m_diagSize - i - 1).setZero();
- break;
- }
- else if (i == this->m_diagSize - 1)
- {
- this->m_nonzeroSingularValues = i + 1;
- break;
- }
- }
- copyUV(bid.householderU(), bid.householderV());
- this->m_isInitialized = true;
- return *this;
-}// end compute
-
-
-template<typename MatrixType>
-void BDCSVD<MatrixType>::copyUV(const typename internal::UpperBidiagonalization<MatrixX>::HouseholderUSequenceType& householderU,
- const typename internal::UpperBidiagonalization<MatrixX>::HouseholderVSequenceType& householderV)
-{
- // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
- if (this->computeU()){
- Index Ucols = this->m_computeThinU ? this->m_nonzeroSingularValues : householderU.cols();
- this->m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
- Index blockCols = this->m_computeThinU ? this->m_nonzeroSingularValues : this->m_diagSize;
- this->m_matrixU.block(0, 0, this->m_diagSize, blockCols) =
- m_naiveV.template cast<Scalar>().block(0, 0, this->m_diagSize, blockCols);
- this->m_matrixU = householderU * this->m_matrixU;
- }
- if (this->computeV()){
- Index Vcols = this->m_computeThinV ? this->m_nonzeroSingularValues : householderV.cols();
- this->m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
- Index blockCols = this->m_computeThinV ? this->m_nonzeroSingularValues : this->m_diagSize;
- this->m_matrixV.block(0, 0, this->m_diagSize, blockCols) =
- m_naiveU.template cast<Scalar>().block(0, 0, this->m_diagSize, blockCols);
- this->m_matrixV = householderV * this->m_matrixV;
- }
-}
-
-// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
-// place of the submatrix we are currently working on.
-
-//@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;
-//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU;
-// lastCol + 1 - firstCol is the size of the submatrix.
-//@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)
-//@param firstRowW : Same as firstRowW with the column.
-//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix
-// to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
-template<typename MatrixType>
-void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
- Index firstColW, Index shift)
-{
- // requires nbRows = nbCols + 1;
- using std::pow;
- using std::sqrt;
- using std::abs;
- const Index n = lastCol - firstCol + 1;
- const Index k = n/2;
- RealScalar alphaK;
- RealScalar betaK;
- RealScalar r0;
- RealScalar lambda, phi, c0, s0;
- MatrixXr l, f;
- // We use the other algorithm which is more efficient for small
- // matrices.
- if (n < algoswap){
- JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n),
- ComputeFullU | (ComputeFullV * compV)) ;
- if (compU) m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() << b.matrixU();
- else
- {
- m_naiveU.row(0).segment(firstCol, n + 1).real() << b.matrixU().row(0);
- m_naiveU.row(1).segment(firstCol, n + 1).real() << b.matrixU().row(n);
- }
- if (compV) m_naiveV.block(firstRowW, firstColW, n, n).real() << b.matrixV();
- m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
- for (int i=0; i<n; i++)
- {
- m_computed(firstCol + shift + i, firstCol + shift +i) = b.singularValues().coeffRef(i);
- }
- return;
- }
- // We use the divide and conquer algorithm
- alphaK = m_computed(firstCol + k, firstCol + k);
- betaK = m_computed(firstCol + k + 1, firstCol + k);
- // The divide must be done in that order in order to have good results. Divide change the data inside the submatrices
- // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the
- // right submatrix before the left one.
- divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
- divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
- if (compU)
- {
- lambda = m_naiveU(firstCol + k, firstCol + k);
- phi = m_naiveU(firstCol + k + 1, lastCol + 1);
- }
- else
- {
- lambda = m_naiveU(1, firstCol + k);
- phi = m_naiveU(0, lastCol + 1);
- }
- r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda))
- + abs(betaK * phi) * abs(betaK * phi));
- if (compU)
- {
- l = m_naiveU.row(firstCol + k).segment(firstCol, k);
- f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);
- }
- else
- {
- l = m_naiveU.row(1).segment(firstCol, k);
- f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
- }
- if (compV) m_naiveV(firstRowW+k, firstColW) = 1;
- if (r0 == 0)
- {
- c0 = 1;
- s0 = 0;
- }
- else
- {
- c0 = alphaK * lambda / r0;
- s0 = betaK * phi / r0;
- }
- if (compU)
- {
- MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));
- // we shiftW Q1 to the right
- for (Index i = firstCol + k - 1; i >= firstCol; i--)
- {
- m_naiveU.col(i + 1).segment(firstCol, k + 1) << m_naiveU.col(i).segment(firstCol, k + 1);
- }
- // we shift q1 at the left with a factor c0
- m_naiveU.col(firstCol).segment( firstCol, k + 1) << (q1 * c0);
- // last column = q1 * - s0
- m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) << (q1 * ( - s0));
- // first column = q2 * s0
- m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) <<
- m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *s0;
- // q2 *= c0
- m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;
- }
- else
- {
- RealScalar q1 = (m_naiveU(0, firstCol + k));
- // we shift Q1 to the right
- for (Index i = firstCol + k - 1; i >= firstCol; i--)
- {
- m_naiveU(0, i + 1) = m_naiveU(0, i);
- }
- // we shift q1 at the left with a factor c0
- m_naiveU(0, firstCol) = (q1 * c0);
- // last column = q1 * - s0
- m_naiveU(0, lastCol + 1) = (q1 * ( - s0));
- // first column = q2 * s0
- m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0;
- // q2 *= c0
- m_naiveU(1, lastCol + 1) *= c0;
- m_naiveU.row(1).segment(firstCol + 1, k).setZero();
- m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();
- }
- m_computed(firstCol + shift, firstCol + shift) = r0;
- m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) << alphaK * l.transpose().real();
- m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) << betaK * f.transpose().real();
-
-
- // Second part: try to deflate singular values in combined matrix
- deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);
-
- // Third part: compute SVD of combined matrix
- MatrixXr UofSVD, VofSVD;
- VectorType singVals;
- computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
- if (compU) m_naiveU.block(firstCol, firstCol, n + 1, n + 1) *= UofSVD;
- else m_naiveU.block(0, firstCol, 2, n + 1) *= UofSVD;
- if (compV) m_naiveV.block(firstRowW, firstColW, n, n) *= VofSVD;
- m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
- m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
-}// end divide
-
-// Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
-// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
-// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
-// that if compV is false, then V is not computed. Singular values are sorted in decreasing order.
-//
-// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
-// handling of round-off errors, be consistent in ordering
-template <typename MatrixType>
-void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
-{
- // TODO Get rid of these copies (?)
- ArrayXr col0 = m_computed.block(firstCol, firstCol, n, 1);
- ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal();
- diag(0) = 0;
-
- // compute singular values and vectors (in decreasing order)
- singVals.resize(n);
- U.resize(n+1, n+1);
- if (compV) V.resize(n, n);
-
- if (col0.hasNaN() || diag.hasNaN()) return;
-
- ArrayXr shifts(n), mus(n), zhat(n);
- computeSingVals(col0, diag, singVals, shifts, mus);
- perturbCol0(col0, diag, singVals, shifts, mus, zhat);
- computeSingVecs(zhat, diag, singVals, shifts, mus, U, V);
-
- // Reverse order so that singular values in increased order
- singVals.reverseInPlace();
- U.leftCols(n) = U.leftCols(n).rowwise().reverse().eval();
- if (compV) V = V.rowwise().reverse().eval();
-}
-
-template <typename MatrixType>
-void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag,
- VectorType& singVals, ArrayXr& shifts, ArrayXr& mus)
-{
- using std::abs;
- using std::swap;
-
- Index n = col0.size();
- for (Index k = 0; k < n; ++k) {
- if (col0(k) == 0) {
- // entry is deflated, so singular value is on diagonal
- singVals(k) = diag(k);
- mus(k) = 0;
- shifts(k) = diag(k);
- continue;
- }
-
- // otherwise, use secular equation to find singular value
- RealScalar left = diag(k);
- RealScalar right = (k != n-1) ? diag(k+1) : (diag(n-1) + col0.matrix().norm());
-
- // first decide whether it's closer to the left end or the right end
- RealScalar mid = left + (right-left) / 2;
- RealScalar fMid = 1 + (col0.square() / ((diag + mid) * (diag - mid))).sum();
-
- RealScalar shift;
- if (k == n-1 || fMid > 0) shift = left;
- else shift = right;
-
- // measure everything relative to shift
- ArrayXr diagShifted = diag - shift;
-
- // initial guess
- RealScalar muPrev, muCur;
- if (shift == left) {
- muPrev = (right - left) * 0.1;
- if (k == n-1) muCur = right - left;
- else muCur = (right - left) * 0.5;
- } else {
- muPrev = -(right - left) * 0.1;
- muCur = -(right - left) * 0.5;
- }
-
- RealScalar fPrev = 1 + (col0.square() / ((diagShifted - muPrev) * (diag + shift + muPrev))).sum();
- RealScalar fCur = 1 + (col0.square() / ((diagShifted - muCur) * (diag + shift + muCur))).sum();
- if (abs(fPrev) < abs(fCur)) {
- swap(fPrev, fCur);
- swap(muPrev, muCur);
- }
-
- // rational interpolation: fit a function of the form a / mu + b through the two previous
- // iterates and use its zero to compute the next iterate
- bool useBisection = false;
- while (abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * (std::max)(abs(muCur), abs(muPrev)) && fCur != fPrev && !useBisection) {
- ++m_numIters;
-
- RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev);
- RealScalar b = fCur - a / muCur;
-
- muPrev = muCur;
- fPrev = fCur;
- muCur = -a / b;
- fCur = 1 + (col0.square() / ((diagShifted - muCur) * (diag + shift + muCur))).sum();
-
- if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true;
- if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true;
- }
-
- // fall back on bisection method if rational interpolation did not work
- if (useBisection) {
- RealScalar leftShifted, rightShifted;
- if (shift == left) {
- leftShifted = 1e-30;
- if (k == 0) rightShifted = right - left;
- else rightShifted = (right - left) * 0.6; // theoretically we can take 0.5, but let's be safe
- } else {
- leftShifted = -(right - left) * 0.6;
- rightShifted = -1e-30;
- }
-
- RealScalar fLeft = 1 + (col0.square() / ((diagShifted - leftShifted) * (diag + shift + leftShifted))).sum();
- RealScalar fRight = 1 + (col0.square() / ((diagShifted - rightShifted) * (diag + shift + rightShifted))).sum();
- assert(fLeft * fRight < 0);
-
- while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * (std::max)(abs(leftShifted), abs(rightShifted))) {
- RealScalar midShifted = (leftShifted + rightShifted) / 2;
- RealScalar fMid = 1 + (col0.square() / ((diagShifted - midShifted) * (diag + shift + midShifted))).sum();
- if (fLeft * fMid < 0) {
- rightShifted = midShifted;
- fRight = fMid;
- } else {
- leftShifted = midShifted;
- fLeft = fMid;
- }
- }
-
- muCur = (leftShifted + rightShifted) / 2;
- }
-
- singVals[k] = shift + muCur;
- shifts[k] = shift;
- mus[k] = muCur;
-
- // perturb singular value slightly if it equals diagonal entry to avoid division by zero later
- // (deflation is supposed to avoid this from happening)
- if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();
- if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();
- }
-}
-
-
-// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
-template <typename MatrixType>
-void BDCSVD<MatrixType>::perturbCol0
- (const ArrayXr& col0, const ArrayXr& diag, const VectorType& singVals,
- const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat)
-{
- Index n = col0.size();
- for (Index k = 0; k < n; ++k) {
- if (col0(k) == 0)
- zhat(k) = 0;
- else {
- // see equation (3.6)
- using std::sqrt;
- RealScalar tmp =
- sqrt(
- (singVals(n-1) + diag(k)) * (mus(n-1) + (shifts(n-1) - diag(k)))
- * (
- ((singVals.head(k).array() + diag(k)) * (mus.head(k) + (shifts.head(k) - diag(k))))
- / ((diag.head(k).array() + diag(k)) * (diag.head(k).array() - diag(k)))
- ).prod()
- * (
- ((singVals.segment(k, n-k-1).array() + diag(k)) * (mus.segment(k, n-k-1) + (shifts.segment(k, n-k-1) - diag(k))))
- / ((diag.tail(n-k-1) + diag(k)) * (diag.tail(n-k-1) - diag(k)))
- ).prod()
- );
- if (col0(k) > 0) zhat(k) = tmp;
- else zhat(k) = -tmp;
- }
- }
-}
-
-// compute singular vectors
-template <typename MatrixType>
-void BDCSVD<MatrixType>::computeSingVecs
- (const ArrayXr& zhat, const ArrayXr& diag, const VectorType& singVals,
- const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V)
-{
- Index n = zhat.size();
- for (Index k = 0; k < n; ++k) {
- if (zhat(k) == 0) {
- U.col(k) = VectorType::Unit(n+1, k);
- if (compV) V.col(k) = VectorType::Unit(n, k);
- } else {
- U.col(k).head(n) = zhat / (((diag - shifts(k)) - mus(k)) * (diag + singVals[k]));
- U(n,k) = 0;
- U.col(k).normalize();
-
- if (compV) {
- V.col(k).tail(n-1) = (diag * zhat / (((diag - shifts(k)) - mus(k)) * (diag + singVals[k]))).tail(n-1);
- V(0,k) = -1;
- V.col(k).normalize();
- }
- }
- }
- U.col(n) = VectorType::Unit(n+1, n);
-}
-
-
-// page 12_13
-// i >= 1, di almost null and zi non null.
-// We use a rotation to zero out zi applied to the left of M
-template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size){
- using std::abs;
- using std::sqrt;
- using std::pow;
- RealScalar c = m_computed(firstCol + shift, firstCol + shift);
- RealScalar s = m_computed(i, firstCol + shift);
- RealScalar r = sqrt(pow(abs(c), 2) + pow(abs(s), 2));
- if (r == 0){
- m_computed(i, i)=0;
- return;
- }
- c/=r;
- s/=r;
- m_computed(firstCol + shift, firstCol + shift) = r;
- m_computed(i, firstCol + shift) = 0;
- m_computed(i, i) = 0;
- if (compU){
- m_naiveU.col(firstCol).segment(firstCol,size) =
- c * m_naiveU.col(firstCol).segment(firstCol, size) -
- s * m_naiveU.col(i).segment(firstCol, size) ;
-
- m_naiveU.col(i).segment(firstCol, size) =
- (c + s*s/c) * m_naiveU.col(i).segment(firstCol, size) +
- (s/c) * m_naiveU.col(firstCol).segment(firstCol,size);
- }
-}// end deflation 43
-
-
-// page 13
-// i,j >= 1, i != j and |di - dj| < epsilon * norm2(M)
-// We apply two rotations to have zj = 0;
-template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size){
- using std::abs;
- using std::sqrt;
- using std::conj;
- using std::pow;
- RealScalar c = m_computed(firstColm, firstColm + j - 1);
- RealScalar s = m_computed(firstColm, firstColm + i - 1);
- RealScalar r = sqrt(pow(abs(c), 2) + pow(abs(s), 2));
- if (r==0){
- m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
- return;
- }
- c/=r;
- s/=r;
- m_computed(firstColm + i, firstColm) = r;
- m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
- m_computed(firstColm + j, firstColm) = 0;
- if (compU){
- m_naiveU.col(firstColu + i).segment(firstColu, size) =
- c * m_naiveU.col(firstColu + i).segment(firstColu, size) -
- s * m_naiveU.col(firstColu + j).segment(firstColu, size) ;
-
- m_naiveU.col(firstColu + j).segment(firstColu, size) =
- (c + s*s/c) * m_naiveU.col(firstColu + j).segment(firstColu, size) +
- (s/c) * m_naiveU.col(firstColu + i).segment(firstColu, size);
- }
- if (compV){
- m_naiveV.col(firstColW + i).segment(firstRowW, size - 1) =
- c * m_naiveV.col(firstColW + i).segment(firstRowW, size - 1) +
- s * m_naiveV.col(firstColW + j).segment(firstRowW, size - 1) ;
-
- m_naiveV.col(firstColW + j).segment(firstRowW, size - 1) =
- (c + s*s/c) * m_naiveV.col(firstColW + j).segment(firstRowW, size - 1) -
- (s/c) * m_naiveV.col(firstColW + i).segment(firstRowW, size - 1);
- }
-}// end deflation 44
-
-
-// acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
-template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift){
- //condition 4.1
- using std::sqrt;
- const Index length = lastCol + 1 - firstCol;
- RealScalar norm1 = m_computed.block(firstCol+shift, firstCol+shift, length, 1).squaredNorm();
- RealScalar norm2 = m_computed.block(firstCol+shift, firstCol+shift, length, length).diagonal().squaredNorm();
- RealScalar EPS = 10 * NumTraits<RealScalar>::epsilon() * sqrt(norm1 + norm2);
- if (m_computed(firstCol + shift, firstCol + shift) < EPS){
- m_computed(firstCol + shift, firstCol + shift) = EPS;
- }
-
- //condition 4.2
- for (Index i=firstCol + shift + 1;i<=lastCol + shift;i++){
- if (std::abs(m_computed(i, firstCol + shift)) < EPS){
- m_computed(i, firstCol + shift) = 0;
- }
- }
-
- //condition 4.3
- for (Index i=firstCol + shift + 1;i<=lastCol + shift; i++){
- if (m_computed(i, i) < EPS){
- deflation43(firstCol, shift, i, length);
- }
- }
-
- //condition 4.4
-
- Index i=firstCol + shift + 1, j=firstCol + shift + k + 1;
- //we stock the final place of each line
- Index *permutation = new Index[length];
-
- for (Index p =1; p < length; p++) {
- if (i> firstCol + shift + k){
- permutation[p] = j;
- j++;
- } else if (j> lastCol + shift)
- {
- permutation[p] = i;
- i++;
- }
- else
- {
- if (m_computed(i, i) < m_computed(j, j)){
- permutation[p] = j;
- j++;
- }
- else
- {
- permutation[p] = i;
- i++;
- }
- }
- }
- //we do the permutation
- RealScalar aux;
- //we stock the current index of each col
- //and the column of each index
- Index *realInd = new Index[length];
- Index *realCol = new Index[length];
- for (int pos = 0; pos< length; pos++){
- realCol[pos] = pos + firstCol + shift;
- realInd[pos] = pos;
- }
- const Index Zero = firstCol + shift;
- VectorType temp;
- for (int i = 1; i < length - 1; i++){
- const Index I = i + Zero;
- const Index realI = realInd[i];
- const Index j = permutation[length - i] - Zero;
- const Index J = realCol[j];
-
- //diag displace
- aux = m_computed(I, I);
- m_computed(I, I) = m_computed(J, J);
- m_computed(J, J) = aux;
-
- //firstrow displace
- aux = m_computed(I, Zero);
- m_computed(I, Zero) = m_computed(J, Zero);
- m_computed(J, Zero) = aux;
-
- // change columns
- if (compU) {
- temp = m_naiveU.col(I - shift).segment(firstCol, length + 1);
- m_naiveU.col(I - shift).segment(firstCol, length + 1) <<
- m_naiveU.col(J - shift).segment(firstCol, length + 1);
- m_naiveU.col(J - shift).segment(firstCol, length + 1) << temp;
- }
- else
- {
- temp = m_naiveU.col(I - shift).segment(0, 2);
- m_naiveU.col(I - shift).segment(0, 2) <<
- m_naiveU.col(J - shift).segment(0, 2);
- m_naiveU.col(J - shift).segment(0, 2) << temp;
- }
- if (compV) {
- const Index CWI = I + firstColW - Zero;
- const Index CWJ = J + firstColW - Zero;
- temp = m_naiveV.col(CWI).segment(firstRowW, length);
- m_naiveV.col(CWI).segment(firstRowW, length) << m_naiveV.col(CWJ).segment(firstRowW, length);
- m_naiveV.col(CWJ).segment(firstRowW, length) << temp;
- }
-
- //update real pos
- realCol[realI] = J;
- realCol[j] = I;
- realInd[J - Zero] = realI;
- realInd[I - Zero] = j;
- }
- for (Index i = firstCol + shift + 1; i<lastCol + shift;i++){
- if ((m_computed(i + 1, i + 1) - m_computed(i, i)) < EPS){
- deflation44(firstCol ,
- firstCol + shift,
- firstRowW,
- firstColW,
- i - Zero,
- i + 1 - Zero,
- length);
- }
- }
- delete [] permutation;
- delete [] realInd;
- delete [] realCol;
-}//end deflation
-
-
-namespace internal{
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<BDCSVD<_MatrixType>, Rhs>
- : solve_retval_base<BDCSVD<_MatrixType>, Rhs>
-{
- typedef BDCSVD<_MatrixType> BDCSVDType;
- EIGEN_MAKE_SOLVE_HELPERS(BDCSVDType, Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- eigen_assert(rhs().rows() == dec().rows());
- // A = U S V^*
- // So A^{ - 1} = V S^{ - 1} U^*
- Index diagSize = (std::min)(dec().rows(), dec().cols());
- typename BDCSVDType::SingularValuesType invertedSingVals(diagSize);
- Index nonzeroSingVals = dec().nonzeroSingularValues();
- invertedSingVals.head(nonzeroSingVals) = dec().singularValues().head(nonzeroSingVals).array().inverse();
- invertedSingVals.tail(diagSize - nonzeroSingVals).setZero();
-
- dst = dec().matrixV().leftCols(diagSize)
- * invertedSingVals.asDiagonal()
- * dec().matrixU().leftCols(diagSize).adjoint()
- * rhs();
- return;
- }
-};
-
-} //end namespace internal
-
- /** \svd_module
- *
- * \return the singular value decomposition of \c *this computed by
- * BDC Algorithm
- *
- * \sa class BDCSVD
- */
-/*
-template<typename Derived>
-BDCSVD<typename MatrixBase<Derived>::PlainObject>
-MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const
-{
- return BDCSVD<PlainObject>(*this, computationOptions);
-}
-*/
-
-} // end namespace Eigen
-
-#endif
diff --git a/unsupported/Eigen/src/BDCSVD/CMakeLists.txt b/unsupported/Eigen/src/BDCSVD/CMakeLists.txt
deleted file mode 100644
index 73b89ea18..000000000
--- a/unsupported/Eigen/src/BDCSVD/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_BDCSVD_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_BDCSVD_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}unsupported/Eigen/src/BDCSVD COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/BDCSVD/TODOBdcsvd.txt b/unsupported/Eigen/src/BDCSVD/TODOBdcsvd.txt
deleted file mode 100644
index 0bc9a46e6..000000000
--- a/unsupported/Eigen/src/BDCSVD/TODOBdcsvd.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-TO DO LIST
-
-
-
-(optional optimization) - do all the allocations in the allocate part
- - support static matrices
- - return a error at compilation time when using integer matrices (int, long, std::complex<int>, ...)
-
-to finish the algorithm :
- -implement the last part of the algorithm as described on the reference paper.
- You may find more information on that part on this paper
-
- -to replace the call to JacobiSVD at the end of the divide algorithm, just after the call to
- deflation.
-
-(suggested step by step resolution)
- 0) comment the call to Jacobi in the last part of the divide method and everything right after
- until the end of the method. What is commented can be a guideline to steps 3) 4) and 6)
- 1) solve the secular equation (Characteristic equation) on the values that are not null (zi!=0 and di!=0), after the deflation
- wich should be uncommented in the divide method
- 2) remember the values of the singular values that are already computed (zi=0)
- 3) assign the singular values found in m_computed at the right places (with the ones found in step 2) )
- in decreasing order
- 4) set the firstcol to zero (except the first element) in m_computed
- 5) compute all the singular vectors when CompV is set to true and only the left vectors when
- CompV is set to false
- 6) multiply naiveU and naiveV to the right by the matrices found, only naiveU when CompV is set to
- false, /!\ if CompU is false NaiveU has only 2 rows
- 7) delete everything commented in step 0)
diff --git a/unsupported/Eigen/src/BDCSVD/doneInBDCSVD.txt b/unsupported/Eigen/src/BDCSVD/doneInBDCSVD.txt
deleted file mode 100644
index 8563ddab8..000000000
--- a/unsupported/Eigen/src/BDCSVD/doneInBDCSVD.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-This unsupported package is about a divide and conquer algorithm to compute SVD.
-
-The implementation follows as closely as possible the following reference paper :
-http://www.cs.yale.edu/publications/techreports/tr933.pdf
-
-The code documentation uses the same names for variables as the reference paper. The code, deflation included, is
-working but there are a few things that could be optimised as explained in the TODOBdsvd.
-
-In the code comments were put at the line where would be the third step of the algorithm so one could simply add the call
-of a function doing the last part of the algorithm and that would not require any knowledge of the part we implemented.
-
-In the TODOBdcsvd we explain what is the main difficulty of the last part and suggest a reference paper to help solve it.
-
-The implemented has trouble with fixed size matrices.
-
-In the actual implementation, it returns matrices of zero when ask to do a svd on an int matrix.
-
-
-Paper for the third part:
-http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
-
diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt
index 654a2327f..8eb2808e3 100644
--- a/unsupported/Eigen/src/CMakeLists.txt
+++ b/unsupported/Eigen/src/CMakeLists.txt
@@ -12,4 +12,3 @@ ADD_SUBDIRECTORY(Skyline)
ADD_SUBDIRECTORY(SparseExtra)
ADD_SUBDIRECTORY(KroneckerProduct)
ADD_SUBDIRECTORY(Splines)
-ADD_SUBDIRECTORY(BDCSVD)
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index 9fcc8a8d9..0e1b7d977 100644
--- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -108,6 +108,7 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
using Base::m_isInitialized;
using Base::m_tolerance;
public:
+ using Base::_solve_impl;
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
@@ -138,25 +139,9 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
~DGMRES() {}
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
- * \a x0 as an initial solution.
- *
- * \sa compute()
- */
- template<typename Rhs,typename Guess>
- inline const internal::solve_retval_with_guess<DGMRES, Rhs, Guess>
- solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
- {
- eigen_assert(m_isInitialized && "DGMRES is not initialized.");
- eigen_assert(Base::rows()==b.rows()
- && "DGMRES::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval_with_guess
- <DGMRES, Rhs, Guess>(*this, b.derived(), x0);
- }
-
/** \internal */
template<typename Rhs,typename Dest>
- void _solveWithGuess(const Rhs& b, Dest& x) const
+ void _solve_with_guess_impl(const Rhs& b, Dest& x) const
{
bool failed = false;
for(int j=0; j<b.cols(); ++j)
@@ -175,10 +160,10 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const Rhs& b, MatrixBase<Dest>& x) const
{
x = b;
- _solveWithGuess(b,x);
+ _solve_with_guess_impl(b,x.derived());
}
/**
* Get the restart value
@@ -522,21 +507,5 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x,
return 0;
}
-namespace internal {
-
- template<typename _MatrixType, typename _Preconditioner, typename Rhs>
-struct solve_retval<DGMRES<_MatrixType, _Preconditioner>, Rhs>
- : solve_retval_base<DGMRES<_MatrixType, _Preconditioner>, Rhs>
-{
- typedef DGMRES<_MatrixType, _Preconditioner> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-} // end namespace internal
-
} // end namespace Eigen
#endif
diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
index 67498705b..cd15ce0bf 100644
--- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
@@ -281,6 +281,7 @@ private:
int m_restart;
public:
+ using Base::_solve_impl;
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
@@ -315,25 +316,9 @@ public:
*/
void set_restart(const int restart) { m_restart=restart; }
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
- * \a x0 as an initial solution.
- *
- * \sa compute()
- */
- template<typename Rhs,typename Guess>
- inline const internal::solve_retval_with_guess<GMRES, Rhs, Guess>
- solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
- {
- eigen_assert(m_isInitialized && "GMRES is not initialized.");
- eigen_assert(Base::rows()==b.rows()
- && "GMRES::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval_with_guess
- <GMRES, Rhs, Guess>(*this, b.derived(), x0);
- }
-
/** \internal */
template<typename Rhs,typename Dest>
- void _solveWithGuess(const Rhs& b, Dest& x) const
+ void _solve_with_guess_impl(const Rhs& b, Dest& x) const
{
bool failed = false;
for(int j=0; j<b.cols(); ++j)
@@ -353,35 +338,17 @@ public:
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const
{
x = b;
if(x.squaredNorm() == 0) return; // Check Zero right hand side
- _solveWithGuess(b,x);
+ _solve_with_guess_impl(b,x.derived());
}
protected:
};
-
-namespace internal {
-
- template<typename _MatrixType, typename _Preconditioner, typename Rhs>
-struct solve_retval<GMRES<_MatrixType, _Preconditioner>, Rhs>
- : solve_retval_base<GMRES<_MatrixType, _Preconditioner>, Rhs>
-{
- typedef GMRES<_MatrixType, _Preconditioner> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_GMRES_H
diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
index 661c1f2e0..35cfa315d 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
@@ -27,8 +27,11 @@ namespace Eigen {
*/
template <typename Scalar, int _UpLo = Lower, typename _OrderingType = NaturalOrdering<int> >
-class IncompleteCholesky : internal::noncopyable
+class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> >
{
+ protected:
+ typedef SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> > Base;
+ using Base::m_isInitialized;
public:
typedef SparseMatrix<Scalar,ColMajor> MatrixType;
typedef _OrderingType OrderingType;
@@ -89,7 +92,7 @@ class IncompleteCholesky : internal::noncopyable
}
template<typename Rhs, typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const Rhs& b, Dest& x) const
{
eigen_assert(m_factorizationIsOk && "factorize() should be called first");
if (m_perm.rows() == b.rows())
@@ -103,22 +106,13 @@ class IncompleteCholesky : internal::noncopyable
x = m_perm * x;
x = m_scal.asDiagonal() * x;
}
- template<typename Rhs> inline const internal::solve_retval<IncompleteCholesky, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_factorizationIsOk && "IncompleteLLT did not succeed");
- eigen_assert(m_isInitialized && "IncompleteLLT is not initialized.");
- eigen_assert(cols()==b.rows()
- && "IncompleteLLT::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<IncompleteCholesky, Rhs>(*this, b.derived());
- }
+
protected:
SparseMatrix<Scalar,ColMajor> m_L; // The lower part stored in CSC
ScalarType m_scal; // The vector for scaling the matrix
Scalar m_shift; //The initial shift parameter
bool m_analysisIsOk;
bool m_factorizationIsOk;
- bool m_isInitialized;
ComputationInfo m_info;
PermutationType m_perm;
@@ -132,7 +126,6 @@ template<typename _MatrixType>
void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)
{
using std::sqrt;
- using std::min;
eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
// Dropping strategies : Keep only the p largest elements per column, where p is the number of elements in the column of the original matrix. Other strategies will be added
@@ -166,7 +159,7 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
for (int j = 0; j < n; j++){
for (int k = colPtr[j]; k < colPtr[j+1]; k++)
vals[k] /= (m_scal(j) * m_scal(rowIdx[k]));
- mindiag = (min)(vals[colPtr[j]], mindiag);
+ mindiag = numext::mini(vals[colPtr[j]], mindiag);
}
if(mindiag < Scalar(0.)) m_shift = m_shift - mindiag;
@@ -256,22 +249,6 @@ inline void IncompleteCholesky<Scalar,_UpLo, OrderingType>::updateList(const Idx
listCol[rowIdx(jk)].push_back(col);
}
}
-namespace internal {
-
-template<typename _Scalar, int _UpLo, typename OrderingType, typename Rhs>
-struct solve_retval<IncompleteCholesky<_Scalar, _UpLo, OrderingType>, Rhs>
- : solve_retval_base<IncompleteCholesky<_Scalar, _UpLo, OrderingType>, Rhs>
-{
- typedef IncompleteCholesky<_Scalar, _UpLo, OrderingType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
-};
-
-} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
index 67e780181..7d08c3515 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
@@ -13,8 +13,12 @@
namespace Eigen {
template <typename _Scalar>
-class IncompleteLU
+class IncompleteLU : public SparseSolverBase<IncompleteLU<_Scalar> >
{
+ protected:
+ typedef SparseSolverBase<IncompleteLU<_Scalar> > Base;
+ using Base::m_isInitialized;
+
typedef _Scalar Scalar;
typedef Matrix<Scalar,Dynamic,1> Vector;
typedef typename Vector::Index Index;
@@ -23,10 +27,10 @@ class IncompleteLU
public:
typedef Matrix<Scalar,Dynamic,Dynamic> MatrixType;
- IncompleteLU() : m_isInitialized(false) {}
+ IncompleteLU() {}
template<typename MatrixType>
- IncompleteLU(const MatrixType& mat) : m_isInitialized(false)
+ IncompleteLU(const MatrixType& mat)
{
compute(mat);
}
@@ -71,43 +75,16 @@ class IncompleteLU
}
template<typename Rhs, typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const Rhs& b, Dest& x) const
{
x = m_lu.template triangularView<UnitLower>().solve(b);
x = m_lu.template triangularView<Upper>().solve(x);
}
- template<typename Rhs> inline const internal::solve_retval<IncompleteLU, Rhs>
- solve(const MatrixBase<Rhs>& b) const
- {
- eigen_assert(m_isInitialized && "IncompleteLU is not initialized.");
- eigen_assert(cols()==b.rows()
- && "IncompleteLU::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval<IncompleteLU, Rhs>(*this, b.derived());
- }
-
protected:
FactorType m_lu;
- bool m_isInitialized;
-};
-
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<IncompleteLU<_MatrixType>, Rhs>
- : solve_retval_base<IncompleteLU<_MatrixType>, Rhs>
-{
- typedef IncompleteLU<_MatrixType> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
};
-} // end namespace internal
-
} // end namespace Eigen
#endif // EIGEN_INCOMPLETE_LU_H
diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
index 98f9ecc17..aaf42c78a 100644
--- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
@@ -2,7 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -217,6 +217,7 @@ namespace Eigen {
using Base::m_info;
using Base::m_isInitialized;
public:
+ using Base::_solve_impl;
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
@@ -244,26 +245,10 @@ namespace Eigen {
/** Destructor. */
~MINRES(){}
-
- /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
- * \a x0 as an initial solution.
- *
- * \sa compute()
- */
- template<typename Rhs,typename Guess>
- inline const internal::solve_retval_with_guess<MINRES, Rhs, Guess>
- solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
- {
- eigen_assert(m_isInitialized && "MINRES is not initialized.");
- eigen_assert(Base::rows()==b.rows()
- && "MINRES::solve(): invalid number of rows of the right hand side matrix b");
- return internal::solve_retval_with_guess
- <MINRES, Rhs, Guess>(*this, b.derived(), x0);
- }
-
+
/** \internal */
template<typename Rhs,typename Dest>
- void _solveWithGuess(const Rhs& b, Dest& x) const
+ void _solve_with_guess_impl(const Rhs& b, Dest& x) const
{
m_iterations = Base::maxIterations();
m_error = Base::m_tolerance;
@@ -284,33 +269,16 @@ namespace Eigen {
/** \internal */
template<typename Rhs,typename Dest>
- void _solve(const Rhs& b, Dest& x) const
+ void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const
{
x.setZero();
- _solveWithGuess(b,x);
+ _solve_with_guess_impl(b,x.derived());
}
protected:
};
-
- namespace internal {
-
- template<typename _MatrixType, int _UpLo, typename _Preconditioner, typename Rhs>
- struct solve_retval<MINRES<_MatrixType,_UpLo,_Preconditioner>, Rhs>
- : solve_retval_base<MINRES<_MatrixType,_UpLo,_Preconditioner>, Rhs>
- {
- typedef MINRES<_MatrixType,_UpLo,_Preconditioner> Dec;
- EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
- template<typename Dest> void evalTo(Dest& dst) const
- {
- dec()._solve(rhs(),dst);
- }
- };
-
- } // end namespace internal
-
+
} // end namespace Eigen
#endif // EIGEN_MINRES_H
diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index b8f2cba17..446fcac16 100644
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -48,8 +48,8 @@ class KroneckerProductBase : public ReturnByValue<Derived>
*/
Scalar coeff(Index row, Index col) const
{
- return m_A.coeff(row / m_B.rows(), col / m_B.cols()) *
- m_B.coeff(row % m_B.rows(), col % m_B.cols());
+ return m_A.coeff(typename Lhs::Index(row / m_B.rows()), typename Lhs::Index(col / m_B.cols())) *
+ m_B.coeff(typename Rhs::Index(row % m_B.rows()), typename Rhs::Index(col % m_B.cols()));
}
/*!
@@ -59,7 +59,7 @@ class KroneckerProductBase : public ReturnByValue<Derived>
Scalar coeff(Index i) const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
- return m_A.coeff(i / m_A.size()) * m_B.coeff(i % m_A.size());
+ return m_A.coeff(typename Lhs::Index(i / m_A.size())) * m_B.coeff(typename Rhs::Index(i % m_A.size()));
}
protected:
@@ -148,38 +148,53 @@ template<typename Lhs, typename Rhs>
template<typename Dest>
void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const
{
- typedef typename Base::Index Index;
- const Index Br = m_B.rows(),
- Bc = m_B.cols();
- dst.resize(this->rows(), this->cols());
+ typedef typename Dest::Index DestIndex;
+ const typename Rhs::Index Br = m_B.rows(),
+ Bc = m_B.cols();
+ eigen_assert(this->rows() <= NumTraits<DestIndex>::highest());
+ eigen_assert(this->cols() <= NumTraits<DestIndex>::highest());
+ dst.resize(DestIndex(this->rows()), DestIndex(this->cols()));
dst.resizeNonZeros(0);
+ // 1 - evaluate the operands if needed:
+ typedef typename internal::nested_eval<Lhs,Dynamic>::type Lhs1;
+ typedef typename internal::remove_all<Lhs1>::type Lhs1Cleaned;
+ const Lhs1 lhs1(m_A);
+ typedef typename internal::nested_eval<Rhs,Dynamic>::type Rhs1;
+ typedef typename internal::remove_all<Rhs1>::type Rhs1Cleaned;
+ const Rhs1 rhs1(m_B);
+
+ // 2 - construct respective iterators
+ typedef Eigen::InnerIterator<Lhs1Cleaned> LhsInnerIterator;
+ typedef Eigen::InnerIterator<Rhs1Cleaned> RhsInnerIterator;
+
// compute number of non-zeros per innervectors of dst
{
VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols());
- for (Index kA=0; kA < m_A.outerSize(); ++kA)
- for (typename Lhs::InnerIterator itA(m_A,kA); itA; ++itA)
+ for (typename Lhs::Index kA=0; kA < m_A.outerSize(); ++kA)
+ for (LhsInnerIterator itA(lhs1,kA); itA; ++itA)
nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++;
VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols());
- for (Index kB=0; kB < m_B.outerSize(); ++kB)
- for (typename Rhs::InnerIterator itB(m_B,kB); itB; ++itB)
+ for (typename Rhs::Index kB=0; kB < m_B.outerSize(); ++kB)
+ for (RhsInnerIterator itB(rhs1,kB); itB; ++itB)
nnzB(Dest::IsRowMajor ? itB.row() : itB.col())++;
Matrix<int,Dynamic,Dynamic,ColMajor> nnzAB = nnzB * nnzA.transpose();
dst.reserve(VectorXi::Map(nnzAB.data(), nnzAB.size()));
}
- for (Index kA=0; kA < m_A.outerSize(); ++kA)
+ for (typename Lhs::Index kA=0; kA < m_A.outerSize(); ++kA)
{
- for (Index kB=0; kB < m_B.outerSize(); ++kB)
+ for (typename Rhs::Index kB=0; kB < m_B.outerSize(); ++kB)
{
- for (typename Lhs::InnerIterator itA(m_A,kA); itA; ++itA)
+ for (LhsInnerIterator itA(lhs1,kA); itA; ++itA)
{
- for (typename Rhs::InnerIterator itB(m_B,kB); itB; ++itB)
+ for (RhsInnerIterator itB(rhs1,kB); itB; ++itB)
{
- const Index i = itA.row() * Br + itB.row(),
- j = itA.col() * Bc + itB.col();
+ const DestIndex
+ i = DestIndex(itA.row() * Br + itB.row()),
+ j = DestIndex(itA.col() * Bc + itB.col());
dst.insert(i,j) = itA.value() * itB.value();
}
}
@@ -201,8 +216,7 @@ struct traits<KroneckerProduct<_Lhs,_Rhs> >
Rows = size_at_compile_time<traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime>::ret,
Cols = size_at_compile_time<traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime>::ret,
MaxRows = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret,
- MaxCols = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret,
- CoeffReadCost = Lhs::CoeffReadCost + Rhs::CoeffReadCost + NumTraits<Scalar>::MulCost
+ MaxCols = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret
};
typedef Matrix<Scalar,Rows,Cols> ReturnType;
@@ -215,7 +229,7 @@ struct traits<KroneckerProductSparse<_Lhs,_Rhs> >
typedef typename remove_all<_Lhs>::type Lhs;
typedef typename remove_all<_Rhs>::type Rhs;
typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
- typedef typename promote_storage_type<typename traits<Lhs>::StorageKind, typename traits<Rhs>::StorageKind>::ret StorageKind;
+ typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, typename traits<Rhs>::StorageKind, scalar_product_op<typename Lhs::Scalar, typename Rhs::Scalar> >::ret StorageKind;
typedef typename promote_index_type<typename Lhs::Index, typename Rhs::Index>::type Index;
enum {
@@ -235,7 +249,7 @@ struct traits<KroneckerProductSparse<_Lhs,_Rhs> >
CoeffReadCost = Dynamic
};
- typedef SparseMatrix<Scalar> ReturnType;
+ typedef SparseMatrix<Scalar, 0, Index> ReturnType;
};
} // end namespace internal
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
index f5290dee4..db3a0ef2c 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
@@ -19,18 +19,19 @@ namespace Eigen {
namespace internal {
-template <typename Scalar,int Rows, int Cols, typename Index>
+template <typename Scalar,int Rows, int Cols, typename PermIndex>
void lmqrsolv(
Matrix<Scalar,Rows,Cols> &s,
- const PermutationMatrix<Dynamic,Dynamic,Index> &iPerm,
+ const PermutationMatrix<Dynamic,Dynamic,PermIndex> &iPerm,
const Matrix<Scalar,Dynamic,1> &diag,
const Matrix<Scalar,Dynamic,1> &qtb,
Matrix<Scalar,Dynamic,1> &x,
Matrix<Scalar,Dynamic,1> &sdiag)
{
+ typedef typename Matrix<Scalar,Rows,Cols>::Index Index;
/* Local variables */
- Index i, j, k, l;
+ Index i, j, k;
Scalar temp;
Index n = s.cols();
Matrix<Scalar,Dynamic,1> wa(n);
@@ -52,7 +53,7 @@ void lmqrsolv(
/* prepare the row of d to be eliminated, locating the */
/* diagonal element using p from the qr factorization. */
- l = iPerm.indices()(j);
+ const PermIndex l = iPerm.indices()(j);
if (diag[l] == 0.)
break;
sdiag.tail(n-j).setZero();
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index 160120d03..9e0545660 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -392,14 +392,15 @@ template<typename Derived> struct MatrixExponentialReturnValue
template <typename ResultType>
inline void evalTo(ResultType& result) const
{
- internal::matrix_exp_compute(m_src, result);
+ const typename internal::nested_eval<Derived, 10>::type tmp(m_src);
+ internal::matrix_exp_compute(tmp, result);
}
Index rows() const { return m_src.rows(); }
Index cols() const { return m_src.cols(); }
protected:
- const typename internal::nested<Derived, 10>::type m_src;
+ const typename internal::nested<Derived>::type m_src;
};
namespace internal {
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index a35c11be5..b68aae5e8 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -485,7 +485,7 @@ template<typename Derived> class MatrixFunctionReturnValue
typedef typename internal::stem_function<Scalar>::type StemFunction;
protected:
- typedef typename internal::nested<Derived, 10>::type DerivedNested;
+ typedef typename internal::nested<Derived>::type DerivedNested;
public:
@@ -503,18 +503,19 @@ template<typename Derived> class MatrixFunctionReturnValue
template <typename ResultType>
inline void evalTo(ResultType& result) const
{
- typedef typename internal::remove_all<DerivedNested>::type DerivedNestedClean;
- typedef internal::traits<DerivedNestedClean> Traits;
+ typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
+ typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean;
+ typedef internal::traits<NestedEvalTypeClean> Traits;
static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
- static const int Options = DerivedNestedClean::Options;
+ static const int Options = NestedEvalTypeClean::Options;
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef Matrix<ComplexScalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType;
AtomicType atomic(m_f);
- internal::matrix_function_compute<DerivedNestedClean>::run(m_A, atomic, result);
+ internal::matrix_function_compute<NestedEvalTypeClean>::run(m_A, atomic, result);
}
Index rows() const { return m_A.rows(); }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index d46ccc145..22bfdc4ac 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -53,15 +53,20 @@ void matrix_log_compute_2x2(const MatrixType& A, MatrixType& result)
result(1,0) = Scalar(0);
result(1,1) = logA11;
- if (A(0,0) == A(1,1)) {
+ Scalar y = A(1,1) - A(0,0);
+ if (y==Scalar(0))
+ {
result(0,1) = A(0,1) / A(0,0);
- } else if ((abs(A(0,0)) < 0.5*abs(A(1,1))) || (abs(A(0,0)) > 2*abs(A(1,1)))) {
- result(0,1) = A(0,1) * (logA11 - logA00) / (A(1,1) - A(0,0));
- } else {
+ }
+ else if ((abs(A(0,0)) < 0.5*abs(A(1,1))) || (abs(A(0,0)) > 2*abs(A(1,1))))
+ {
+ result(0,1) = A(0,1) * (logA11 - logA00) / y;
+ }
+ else
+ {
// computation in previous branch is inaccurate if A(1,1) \approx A(0,0)
int unwindingNumber = static_cast<int>(ceil((imag(logA11 - logA00) - M_PI) / (2*M_PI)));
- Scalar y = A(1,1) - A(0,0), x = A(1,1) + A(0,0);
- result(0,1) = A(0,1) * (Scalar(2) * numext::atanh2(y,x) + Scalar(0,2*M_PI*unwindingNumber)) / y;
+ result(0,1) = A(0,1) * (numext::log1p(y/A(0,0)) + Scalar(0,2*M_PI*unwindingNumber)) / y;
}
}
@@ -310,7 +315,7 @@ public:
typedef typename Derived::Index Index;
protected:
- typedef typename internal::nested<Derived, 10>::type DerivedNested;
+ typedef typename internal::nested<Derived>::type DerivedNested;
public:
@@ -327,17 +332,18 @@ public:
template <typename ResultType>
inline void evalTo(ResultType& result) const
{
- typedef typename internal::remove_all<DerivedNested>::type DerivedNestedClean;
- typedef internal::traits<DerivedNestedClean> Traits;
+ typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+ typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+ typedef internal::traits<DerivedEvalTypeClean> Traits;
static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
- static const int Options = DerivedNestedClean::Options;
+ static const int Options = DerivedEvalTypeClean::Options;
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef Matrix<ComplexScalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
AtomicType atomic;
- internal::matrix_function_compute<DerivedNestedClean>::run(m_A, atomic, result);
+ internal::matrix_function_compute<DerivedEvalTypeClean>::run(m_A, atomic, result);
}
Index rows() const { return m_A.rows(); }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
index ee665c18e..1e5a59c55 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -299,7 +299,7 @@ MatrixPowerAtomic<MatrixType>::computeSuperDiag(const ComplexScalar& curr, const
ComplexScalar logCurr = log(curr);
ComplexScalar logPrev = log(prev);
int unwindingNumber = ceil((numext::imag(logCurr - logPrev) - M_PI) / (2*M_PI));
- ComplexScalar w = numext::atanh2(curr - prev, curr + prev) + ComplexScalar(0, M_PI*unwindingNumber);
+ ComplexScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2) + ComplexScalar(0, M_PI*unwindingNumber);
return RealScalar(2) * exp(RealScalar(0.5) * p * (logCurr + logPrev)) * sinh(p * w) / (curr - prev);
}
@@ -311,7 +311,7 @@ MatrixPowerAtomic<MatrixType>::computeSuperDiag(RealScalar curr, RealScalar prev
using std::log;
using std::sinh;
- RealScalar w = numext::atanh2(curr - prev, curr + prev);
+ RealScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2);
return 2 * exp(p * (log(curr) + log(prev)) / 2) * sinh(p * w) / (curr - prev);
}
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
index 8ca4f4864..3a4d6eb3f 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
@@ -320,7 +320,7 @@ template<typename Derived> class MatrixSquareRootReturnValue
{
protected:
typedef typename Derived::Index Index;
- typedef typename internal::nested<Derived, 10>::type DerivedNested;
+ typedef typename internal::nested<Derived>::type DerivedNested;
public:
/** \brief Constructor.
@@ -338,8 +338,10 @@ template<typename Derived> class MatrixSquareRootReturnValue
template <typename ResultType>
inline void evalTo(ResultType& result) const
{
- typedef typename internal::remove_all<DerivedNested>::type DerivedNestedClean;
- internal::matrix_sqrt_compute<DerivedNestedClean>::run(m_src, result);
+ typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+ typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+ DerivedEvalType tmp(m_src);
+ internal::matrix_sqrt_compute<DerivedEvalTypeClean>::run(tmp, result);
}
Index rows() const { return m_src.rows(); }
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialUtils.h b/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
index 2bb8bc84a..40ba65b7e 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
@@ -56,7 +56,7 @@ T poly_eval( const Polynomials& poly, const T& x )
for( DenseIndex i=1; i<poly.size(); ++i ){
val = val*inv_x + poly[i]; }
- return std::pow(x,(T)(poly.size()-1)) * val;
+ return numext::pow(x,(T)(poly.size()-1)) * val;
}
}
diff --git a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
index dec16df28..976f9f270 100644
--- a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
+++ b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
@@ -331,6 +331,7 @@ class DynamicSparseMatrix<Scalar,_Options,_Index>::InnerIterator : public Sparse
inline Index row() const { return IsRowMajor ? m_outer : Base::index(); }
inline Index col() const { return IsRowMajor ? Base::index() : m_outer; }
+ inline Index outer() const { return m_outer; }
protected:
const Index m_outer;
@@ -347,11 +348,42 @@ class DynamicSparseMatrix<Scalar,_Options,_Index>::ReverseInnerIterator : public
inline Index row() const { return IsRowMajor ? m_outer : Base::index(); }
inline Index col() const { return IsRowMajor ? Base::index() : m_outer; }
+ inline Index outer() const { return m_outer; }
protected:
const Index m_outer;
};
+namespace internal {
+
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_Index> >
+ : evaluator_base<DynamicSparseMatrix<_Scalar,_Options,_Index> >
+{
+ typedef _Scalar Scalar;
+ typedef _Index Index;
+ typedef DynamicSparseMatrix<_Scalar,_Options,_Index> SparseMatrixType;
+ typedef typename SparseMatrixType::InnerIterator InnerIterator;
+ typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
+
+ enum {
+ CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+ Flags = SparseMatrixType::Flags
+ };
+
+ evaluator() : m_matrix(0) {}
+ evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {}
+
+ operator SparseMatrixType&() { return m_matrix->const_cast_derived(); }
+ operator const SparseMatrixType&() const { return *m_matrix; }
+
+ Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); }
+
+ const SparseMatrixType *m_matrix;
+};
+
+}
+
} // end namespace Eigen
#endif // EIGEN_DYNAMIC_SPARSEMATRIX_H
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 9f44e47f9..7b6751f00 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -5,6 +5,7 @@ add_custom_target(BuildUnsupported)
include_directories(../../test ../../unsupported ../../Eigen
${CMAKE_CURRENT_BINARY_DIR}/../../test)
+
find_package(GoogleHash)
if(GOOGLEHASH_FOUND)
add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
@@ -40,6 +41,7 @@ ei_add_test(matrix_function)
ei_add_test(matrix_power)
ei_add_test(matrix_square_root)
ei_add_test(alignedvector3)
+
ei_add_test(FFT)
find_package(MPFR 2.3.0)
@@ -74,8 +76,9 @@ if(NOT EIGEN_TEST_NO_OPENGL)
find_package(GLUT)
find_package(GLEW)
if(OPENGL_FOUND AND GLUT_FOUND AND GLEW_FOUND)
+ include_directories(${OPENGL_INCLUDE_DIR} ${GLUT_INCLUDE_DIR} ${GLEW_INCLUDE_DIRS})
ei_add_property(EIGEN_TESTED_BACKENDS "OpenGL, ")
- set(EIGEN_GL_LIB ${GLUT_LIBRARIES} ${GLEW_LIBRARIES})
+ set(EIGEN_GL_LIB ${GLUT_LIBRARIES} ${GLEW_LIBRARIES} ${OPENGL_LIBRARIES})
ei_add_test(openglsupport "" "${EIGEN_GL_LIB}" )
else()
ei_add_property(EIGEN_MISSING_BACKENDS "OpenGL, ")
@@ -86,12 +89,11 @@ endif()
ei_add_test(polynomialsolver)
ei_add_test(polynomialutils)
-ei_add_test(kronecker_product)
ei_add_test(splines)
ei_add_test(gmres)
ei_add_test(minres)
ei_add_test(levenberg_marquardt)
-ei_add_test(bdcsvd)
+ei_add_test(kronecker_product)
option(EIGEN_TEST_CXX11 "Enable testing of C++11 features (e.g. Tensor module)." ON)
if(EIGEN_TEST_CXX11)
diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp
index 75974f84f..724ea7b5b 100644
--- a/unsupported/test/NonLinearOptimization.cpp
+++ b/unsupported/test/NonLinearOptimization.cpp
@@ -246,9 +246,9 @@ struct hybrj_functor : Functor<double>
int operator()(const VectorXd &x, VectorXd &fvec)
{
double temp, temp1, temp2;
- const int n = x.size();
+ const VectorXd::Index n = x.size();
assert(fvec.size()==n);
- for (int k = 0; k < n; k++)
+ for (VectorXd::Index k = 0; k < n; k++)
{
temp = (3. - 2.*x[k])*x[k];
temp1 = 0.;
@@ -261,12 +261,12 @@ struct hybrj_functor : Functor<double>
}
int df(const VectorXd &x, MatrixXd &fjac)
{
- const int n = x.size();
+ const VectorXd::Index n = x.size();
assert(fjac.rows()==n);
assert(fjac.cols()==n);
- for (int k = 0; k < n; k++)
+ for (VectorXd::Index k = 0; k < n; k++)
{
- for (int j = 0; j < n; j++)
+ for (VectorXd::Index j = 0; j < n; j++)
fjac(k,j) = 0.;
fjac(k,k) = 3.- 4.*x[k];
if (k) fjac(k,k-1) = -1.;
@@ -351,10 +351,10 @@ struct hybrd_functor : Functor<double>
int operator()(const VectorXd &x, VectorXd &fvec) const
{
double temp, temp1, temp2;
- const int n = x.size();
+ const VectorXd::Index n = x.size();
assert(fvec.size()==n);
- for (int k=0; k < n; k++)
+ for (VectorXd::Index k=0; k < n; k++)
{
temp = (3. - 2.*x[k])*x[k];
temp1 = 0.;
@@ -455,7 +455,7 @@ struct lmstr_functor : Functor<double>
assert(jac_row.size()==x.size());
double tmp1, tmp2, tmp3, tmp4;
- int i = rownb-2;
+ VectorXd::Index i = rownb-2;
tmp1 = i+1;
tmp2 = 16 - i - 1;
tmp3 = (i>=8)? tmp2 : tmp1;
diff --git a/unsupported/test/bdcsvd.cpp b/unsupported/test/bdcsvd.cpp
deleted file mode 100644
index 115a649b0..000000000
--- a/unsupported/test/bdcsvd.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
-// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
-// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
-// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/
-
-#include "svd_common.h"
-#include <iostream>
-#include <Eigen/LU>
-
-// check if "svd" is the good image of "m"
-template<typename MatrixType>
-void bdcsvd_check_full(const MatrixType& m, const BDCSVD<MatrixType>& svd)
-{
- svd_check_full< MatrixType, BDCSVD< MatrixType > >(m, svd);
-}
-
-// Compare to a reference value
-template<typename MatrixType>
-void bdcsvd_compare_to_full(const MatrixType& m,
- unsigned int computationOptions,
- const BDCSVD<MatrixType>& referenceSvd)
-{
- svd_compare_to_full< MatrixType, BDCSVD< MatrixType > >(m, computationOptions, referenceSvd);
-} // end bdcsvd_compare_to_full
-
-
-template<typename MatrixType>
-void bdcsvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
- svd_solve< MatrixType, BDCSVD< MatrixType > >(m, computationOptions);
-} // end template bdcsvd_solve
-
-
-// test the computations options
-template<typename MatrixType>
-void bdcsvd_test_all_computation_options(const MatrixType& m)
-{
- BDCSVD<MatrixType> fullSvd(m, ComputeFullU|ComputeFullV);
- svd_test_computation_options_1< MatrixType, BDCSVD< MatrixType > >(m, fullSvd);
- svd_test_computation_options_2< MatrixType, BDCSVD< MatrixType > >(m, fullSvd);
-} // end bdcsvd_test_all_computation_options
-
-
-// Call a test with all the computations options
-template<typename MatrixType>
-void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-{
- MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a;
- bdcsvd_test_all_computation_options<MatrixType>(m);
-} // end template bdcsvd
-
-
-// verify assert
-template<typename MatrixType>
-void bdcsvd_verify_assert(const MatrixType& m)
-{
- svd_verify_assert< MatrixType, BDCSVD< MatrixType > >(m);
-}// end template bdcsvd_verify_assert
-
-
-// test weird values
-template<typename MatrixType>
-void bdcsvd_inf_nan()
-{
- svd_inf_nan< MatrixType, BDCSVD< MatrixType > >();
-}// end template bdcsvd_inf_nan
-
-
-
-void bdcsvd_preallocate()
-{
- svd_preallocate< BDCSVD< MatrixXf > >();
-} // end bdcsvd_preallocate
-
-
-// compare the Singular values returned with Jacobi and Bdc
-template<typename MatrixType>
-void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0)
-{
- std::cout << "debut compare" << std::endl;
- MatrixType m = MatrixType::Random(a.rows(), a.cols());
- BDCSVD<MatrixType> bdc_svd(m);
- JacobiSVD<MatrixType> jacobi_svd(m);
- VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues());
- if(computationOptions & ComputeFullU)
- VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
- if(computationOptions & ComputeThinU)
- VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
- if(computationOptions & ComputeFullV)
- VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
- if(computationOptions & ComputeThinV)
- VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
- std::cout << "fin compare" << std::endl;
-} // end template compare_bdc_jacobi
-
-
-// call the tests
-void test_bdcsvd()
-{
- // test of Dynamic defined Matrix (42, 42) of float
- CALL_SUBTEST_11(( bdcsvd_verify_assert<Matrix<float,Dynamic,Dynamic> >
- (Matrix<float,Dynamic,Dynamic>(42,42)) ));
- CALL_SUBTEST_11(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> >
- (Matrix<float,Dynamic,Dynamic>(42,42), 0) ));
- CALL_SUBTEST_11(( bdcsvd<Matrix<float,Dynamic,Dynamic> >
- (Matrix<float,Dynamic,Dynamic>(42,42)) ));
-
- // test of Dynamic defined Matrix (50, 50) of double
- CALL_SUBTEST_13(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(50,50)) ));
- CALL_SUBTEST_13(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(50,50), 0) ));
- CALL_SUBTEST_13(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(50, 50)) ));
-
- // test of Dynamic defined Matrix (22, 22) of complex double
- CALL_SUBTEST_14(( bdcsvd_verify_assert<Matrix<std::complex<double>,Dynamic,Dynamic> >
- (Matrix<std::complex<double>,Dynamic,Dynamic>(22,22)) ));
- CALL_SUBTEST_14(( compare_bdc_jacobi<Matrix<std::complex<double>,Dynamic,Dynamic> >
- (Matrix<std::complex<double>, Dynamic, Dynamic> (22,22), 0) ));
- CALL_SUBTEST_14(( bdcsvd<Matrix<std::complex<double>,Dynamic,Dynamic> >
- (Matrix<std::complex<double>,Dynamic,Dynamic>(22, 22)) ));
-
- // test of Dynamic defined Matrix (10, 10) of int
- //CALL_SUBTEST_15(( bdcsvd_verify_assert<Matrix<int,Dynamic,Dynamic> >
- // (Matrix<int,Dynamic,Dynamic>(10,10)) ));
- //CALL_SUBTEST_15(( compare_bdc_jacobi<Matrix<int,Dynamic,Dynamic> >
- // (Matrix<int,Dynamic,Dynamic>(10,10), 0) ));
- //CALL_SUBTEST_15(( bdcsvd<Matrix<int,Dynamic,Dynamic> >
- // (Matrix<int,Dynamic,Dynamic>(10, 10)) ));
-
-
- // test of Dynamic defined Matrix (8, 6) of double
-
- CALL_SUBTEST_16(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(8,6)) ));
- CALL_SUBTEST_16(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(8, 6), 0) ));
- CALL_SUBTEST_16(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(8, 6)) ));
-
-
-
- // test of Dynamic defined Matrix (36, 12) of float
- CALL_SUBTEST_17(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> >
- (Matrix<float,Dynamic,Dynamic>(36, 12), 0) ));
- CALL_SUBTEST_17(( bdcsvd<Matrix<float,Dynamic,Dynamic> >
- (Matrix<float,Dynamic,Dynamic>(36, 12)) ));
-
- // test of Dynamic defined Matrix (5, 8) of double
- CALL_SUBTEST_18(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(5, 8), 0) ));
- CALL_SUBTEST_18(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(5, 8)) ));
-
-
- // non regression tests
- CALL_SUBTEST_3(( bdcsvd_verify_assert(Matrix3f()) ));
- CALL_SUBTEST_4(( bdcsvd_verify_assert(Matrix4d()) ));
- CALL_SUBTEST_7(( bdcsvd_verify_assert(MatrixXf(10,12)) ));
- CALL_SUBTEST_8(( bdcsvd_verify_assert(MatrixXcd(7,5)) ));
-
- // SUBTESTS 1 and 2 on specifics matrix
- for(int i = 0; i < g_repeat; i++) {
- Matrix2cd m;
- m << 0, 1,
- 0, 1;
- CALL_SUBTEST_1(( bdcsvd(m, false) ));
- m << 1, 0,
- 1, 0;
- CALL_SUBTEST_1(( bdcsvd(m, false) ));
-
- Matrix2d n;
- n << 0, 0,
- 0, 0;
- CALL_SUBTEST_2(( bdcsvd(n, false) ));
- n << 0, 0,
- 0, 1;
- CALL_SUBTEST_2(( bdcsvd(n, false) ));
-
- // Statics matrix don't work with BDSVD yet
- // bdc algo on a random 3x3 float matrix
- // CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
- // bdc algo on a random 4x4 double matrix
- // CALL_SUBTEST_4(( bdcsvd<Matrix4d>() ));
- // bdc algo on a random 3x5 float matrix
- // CALL_SUBTEST_5(( bdcsvd<Matrix<float,3,5> >() ));
-
- int r = internal::random<int>(1, 30),
- c = internal::random<int>(1, 30);
- CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(r,c)) ));
- CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(r,c)) ));
- (void) r;
- (void) c;
-
- // Test on inf/nan matrix
- CALL_SUBTEST_7( bdcsvd_inf_nan<MatrixXf>() );
- }
-
- CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
- CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) ));
-
- // Test problem size constructors
- CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
-
-} // end test_bdcsvd
diff --git a/unsupported/test/jacobisvd.cpp b/unsupported/test/jacobisvd.cpp
deleted file mode 100644
index b4e884eee..000000000
--- a/unsupported/test/jacobisvd.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "svd_common.h"
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_check_full(const MatrixType& m, const JacobiSVD<MatrixType, QRPreconditioner>& svd)
-{
- svd_check_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner > >(m, svd);
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_compare_to_full(const MatrixType& m,
- unsigned int computationOptions,
- const JacobiSVD<MatrixType, QRPreconditioner>& referenceSvd)
-{
- svd_compare_to_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner> >(m, computationOptions, referenceSvd);
-}
-
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
- svd_solve< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, computationOptions);
-}
-
-
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_test_all_computation_options(const MatrixType& m)
-{
-
- if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
- return;
-
- JacobiSVD< MatrixType, QRPreconditioner > fullSvd(m, ComputeFullU|ComputeFullV);
- svd_test_computation_options_1< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd);
-
- if(QRPreconditioner == FullPivHouseholderQRPreconditioner)
- return;
- svd_test_computation_options_2< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd);
-
-}
-
-template<typename MatrixType>
-void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-{
- MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a;
-
- jacobisvd_test_all_computation_options<MatrixType, FullPivHouseholderQRPreconditioner>(m);
- jacobisvd_test_all_computation_options<MatrixType, ColPivHouseholderQRPreconditioner>(m);
- jacobisvd_test_all_computation_options<MatrixType, HouseholderQRPreconditioner>(m);
- jacobisvd_test_all_computation_options<MatrixType, NoQRPreconditioner>(m);
-}
-
-
-template<typename MatrixType>
-void jacobisvd_verify_assert(const MatrixType& m)
-{
-
- svd_verify_assert<MatrixType, JacobiSVD< MatrixType > >(m);
-
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
-
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime
- };
-
- MatrixType a = MatrixType::Zero(rows, cols);
- a.setZero();
-
- if (ColsAtCompileTime == Dynamic)
- {
- JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> svd_fullqr;
- VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV))
- VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV))
- VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV))
- }
-}
-
-template<typename MatrixType>
-void jacobisvd_method()
-{
- enum { Size = MatrixType::RowsAtCompileTime };
- typedef typename MatrixType::RealScalar RealScalar;
- typedef Matrix<RealScalar, Size, 1> RealVecType;
- MatrixType m = MatrixType::Identity();
- VERIFY_IS_APPROX(m.jacobiSvd().singularValues(), RealVecType::Ones());
- VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixU());
- VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixV());
- VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m);
-}
-
-
-
-template<typename MatrixType>
-void jacobisvd_inf_nan()
-{
- svd_inf_nan<MatrixType, JacobiSVD< MatrixType > >();
-}
-
-
-// Regression test for bug 286: JacobiSVD loops indefinitely with some
-// matrices containing denormal numbers.
-void jacobisvd_bug286()
-{
-#if defined __INTEL_COMPILER
-// shut up warning #239: floating point underflow
-#pragma warning push
-#pragma warning disable 239
-#endif
- Matrix2d M;
- M << -7.90884e-313, -4.94e-324,
- 0, 5.60844e-313;
-#if defined __INTEL_COMPILER
-#pragma warning pop
-#endif
- JacobiSVD<Matrix2d> svd;
- svd.compute(M); // just check we don't loop indefinitely
-}
-
-
-void jacobisvd_preallocate()
-{
- svd_preallocate< JacobiSVD <MatrixXf> >();
-}
-
-void test_jacobisvd()
-{
- CALL_SUBTEST_11(( jacobisvd<Matrix<double,Dynamic,Dynamic> >
- (Matrix<double,Dynamic,Dynamic>(16, 6)) ));
-
- CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) ));
- CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) ));
- CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) ));
- CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) ));
-
- for(int i = 0; i < g_repeat; i++) {
- Matrix2cd m;
- m << 0, 1,
- 0, 1;
- CALL_SUBTEST_1(( jacobisvd(m, false) ));
- m << 1, 0,
- 1, 0;
- CALL_SUBTEST_1(( jacobisvd(m, false) ));
-
- Matrix2d n;
- n << 0, 0,
- 0, 0;
- CALL_SUBTEST_2(( jacobisvd(n, false) ));
- n << 0, 0,
- 0, 1;
- CALL_SUBTEST_2(( jacobisvd(n, false) ));
-
- CALL_SUBTEST_3(( jacobisvd<Matrix3f>() ));
- CALL_SUBTEST_4(( jacobisvd<Matrix4d>() ));
- CALL_SUBTEST_5(( jacobisvd<Matrix<float,3,5> >() ));
- CALL_SUBTEST_6(( jacobisvd<Matrix<double,Dynamic,2> >(Matrix<double,Dynamic,2>(10,2)) ));
-
- int r = internal::random<int>(1, 30),
- c = internal::random<int>(1, 30);
- CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(r,c)) ));
- CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(r,c)) ));
- (void) r;
- (void) c;
-
- // Test on inf/nan matrix
- CALL_SUBTEST_7( jacobisvd_inf_nan<MatrixXf>() );
- }
-
- CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
- CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) ));
-
-
- // test matrixbase method
- CALL_SUBTEST_1(( jacobisvd_method<Matrix2cd>() ));
- CALL_SUBTEST_3(( jacobisvd_method<Matrix3f>() ));
-
-
- // Test problem size constructors
- CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
-
- // Check that preallocation avoids subsequent mallocs
- CALL_SUBTEST_9( jacobisvd_preallocate() );
-
- // Regression check for bug 286
- CALL_SUBTEST_2( jacobisvd_bug286() );
-}
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 753a2d417..02411a262 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -216,5 +216,17 @@ void test_kronecker_product()
sC2 = kroneckerProduct(sA,sB);
dC = kroneckerProduct(dA,dB);
VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+
+ sC2 = kroneckerProduct(dA,sB);
+ dC = kroneckerProduct(dA,dB);
+ VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+
+ sC2 = kroneckerProduct(sA,dB);
+ dC = kroneckerProduct(dA,dB);
+ VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+
+ sC2 = kroneckerProduct(2*sA,sB);
+ dC = kroneckerProduct(2*dA,dB);
+ VERIFY_IS_APPROX(MatrixXf(sC2),dC);
}
}
diff --git a/unsupported/test/svd_common.h b/unsupported/test/svd_common.h
deleted file mode 100644
index 6a96e7c74..000000000
--- a/unsupported/test/svd_common.h
+++ /dev/null
@@ -1,261 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
-// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
-// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
-// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-// discard stack allocation as that too bypasses malloc
-#define EIGEN_STACK_ALLOCATION_LIMIT 0
-#define EIGEN_RUNTIME_NO_MALLOC
-
-#include "main.h"
-#include <unsupported/Eigen/BDCSVD>
-#include <Eigen/LU>
-
-
-// check if "svd" is the good image of "m"
-template<typename MatrixType, typename SVD>
-void svd_check_full(const MatrixType& m, const SVD& svd)
-{
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime
- };
-
- typedef typename MatrixType::Scalar Scalar;
- typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
- typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
-
-
- MatrixType sigma = MatrixType::Zero(rows, cols);
- sigma.diagonal() = svd.singularValues().template cast<Scalar>();
- MatrixUType u = svd.matrixU();
- MatrixVType v = svd.matrixV();
- VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
- VERIFY_IS_UNITARY(u);
- VERIFY_IS_UNITARY(v);
-} // end svd_check_full
-
-
-
-// Compare to a reference value
-template<typename MatrixType, typename SVD>
-void svd_compare_to_full(const MatrixType& m,
- unsigned int computationOptions,
- const SVD& referenceSvd)
-{
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
- Index diagSize = (std::min)(rows, cols);
-
- SVD svd(m, computationOptions);
-
- VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
- if(computationOptions & ComputeFullU)
- VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
- if(computationOptions & ComputeThinU)
- VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
- if(computationOptions & ComputeFullV)
- VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
- if(computationOptions & ComputeThinV)
- VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
-} // end svd_compare_to_full
-
-
-
-template<typename MatrixType, typename SVD>
-void svd_solve(const MatrixType& m, unsigned int computationOptions)
-{
- typedef typename MatrixType::Scalar Scalar;
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
-
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime
- };
-
- typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType;
- typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
-
- RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
- SVD svd(m, computationOptions);
- SolutionType x = svd.solve(rhs);
- // evaluate normal equation which works also for least-squares solutions
- VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs);
-} // end svd_solve
-
-
-// test computations options
-// 2 functions because Jacobisvd can return before the second function
-template<typename MatrixType, typename SVD>
-void svd_test_computation_options_1(const MatrixType& m, const SVD& fullSvd)
-{
- svd_check_full< MatrixType, SVD >(m, fullSvd);
- svd_solve< MatrixType, SVD >(m, ComputeFullU | ComputeFullV);
-}
-
-
-template<typename MatrixType, typename SVD>
-void svd_test_computation_options_2(const MatrixType& m, const SVD& fullSvd)
-{
- svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU, fullSvd);
- svd_compare_to_full< MatrixType, SVD >(m, ComputeFullV, fullSvd);
- svd_compare_to_full< MatrixType, SVD >(m, 0, fullSvd);
-
- if (MatrixType::ColsAtCompileTime == Dynamic) {
- // thin U/V are only available with dynamic number of columns
-
- svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU|ComputeThinV, fullSvd);
- svd_compare_to_full< MatrixType, SVD >(m, ComputeThinV, fullSvd);
- svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeFullV, fullSvd);
- svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU , fullSvd);
- svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeThinV, fullSvd);
- svd_solve<MatrixType, SVD>(m, ComputeFullU | ComputeThinV);
- svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeFullV);
- svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeThinV);
-
- typedef typename MatrixType::Index Index;
- Index diagSize = (std::min)(m.rows(), m.cols());
- SVD svd(m, ComputeThinU | ComputeThinV);
- VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
- }
-}
-
-template<typename MatrixType, typename SVD>
-void svd_verify_assert(const MatrixType& m)
-{
- typedef typename MatrixType::Scalar Scalar;
- typedef typename MatrixType::Index Index;
- Index rows = m.rows();
- Index cols = m.cols();
-
- enum {
- RowsAtCompileTime = MatrixType::RowsAtCompileTime,
- ColsAtCompileTime = MatrixType::ColsAtCompileTime
- };
-
- typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType;
- RhsType rhs(rows);
- SVD svd;
- VERIFY_RAISES_ASSERT(svd.matrixU())
- VERIFY_RAISES_ASSERT(svd.singularValues())
- VERIFY_RAISES_ASSERT(svd.matrixV())
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
- MatrixType a = MatrixType::Zero(rows, cols);
- a.setZero();
- svd.compute(a, 0);
- VERIFY_RAISES_ASSERT(svd.matrixU())
- VERIFY_RAISES_ASSERT(svd.matrixV())
- svd.singularValues();
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
-
- if (ColsAtCompileTime == Dynamic)
- {
- svd.compute(a, ComputeThinU);
- svd.matrixU();
- VERIFY_RAISES_ASSERT(svd.matrixV())
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
- svd.compute(a, ComputeThinV);
- svd.matrixV();
- VERIFY_RAISES_ASSERT(svd.matrixU())
- VERIFY_RAISES_ASSERT(svd.solve(rhs))
- }
- else
- {
- VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
- VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
- }
-}
-
-// work around stupid msvc error when constructing at compile time an expression that involves
-// a division by zero, even if the numeric type has floating point
-template<typename Scalar>
-EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
-
-// workaround aggressive optimization in ICC
-template<typename T> EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; }
-
-
-template<typename MatrixType, typename SVD>
-void svd_inf_nan()
-{
- // all this function does is verify we don't iterate infinitely on nan/inf values
-
- SVD svd;
- typedef typename MatrixType::Scalar Scalar;
- Scalar some_inf = Scalar(1) / zero<Scalar>();
- VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
- svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
-
- Scalar some_nan = zero<Scalar> () / zero<Scalar> ();
- VERIFY(some_nan != some_nan);
- svd.compute(MatrixType::Constant(10,10,some_nan), ComputeFullU | ComputeFullV);
-
- MatrixType m = MatrixType::Zero(10,10);
- m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
- svd.compute(m, ComputeFullU | ComputeFullV);
-
- m = MatrixType::Zero(10,10);
- m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_nan;
- svd.compute(m, ComputeFullU | ComputeFullV);
-}
-
-
-template<typename SVD>
-void svd_preallocate()
-{
- Vector3f v(3.f, 2.f, 1.f);
- MatrixXf m = v.asDiagonal();
-
- internal::set_is_malloc_allowed(false);
- VERIFY_RAISES_ASSERT(VectorXf v(10);)
- SVD svd;
- internal::set_is_malloc_allowed(true);
- svd.compute(m);
- VERIFY_IS_APPROX(svd.singularValues(), v);
-
- SVD svd2(3,3);
- internal::set_is_malloc_allowed(false);
- svd2.compute(m);
- internal::set_is_malloc_allowed(true);
- VERIFY_IS_APPROX(svd2.singularValues(), v);
- VERIFY_RAISES_ASSERT(svd2.matrixU());
- VERIFY_RAISES_ASSERT(svd2.matrixV());
- svd2.compute(m, ComputeFullU | ComputeFullV);
- VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
- VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
- internal::set_is_malloc_allowed(false);
- svd2.compute(m);
- internal::set_is_malloc_allowed(true);
-
- SVD svd3(3,3,ComputeFullU|ComputeFullV);
- internal::set_is_malloc_allowed(false);
- svd2.compute(m);
- internal::set_is_malloc_allowed(true);
- VERIFY_IS_APPROX(svd2.singularValues(), v);
- VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
- VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
- internal::set_is_malloc_allowed(false);
- svd2.compute(m, ComputeFullU|ComputeFullV);
- internal::set_is_malloc_allowed(true);
-}
-
-
-
-
-