diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2021-02-26 09:12:44 -0800 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2021-02-26 09:12:44 -0800 |
commit | e67672024d82ad4fb173d647842e0a8ad2448fa1 (patch) | |
tree | b31c01fb64adaf7d3bd8ea0bafe51338b05ab689 | |
parent | 5e7d4c33d637866ba4c8eef46e4826ace1bf6402 (diff) | |
parent | 51eba8c3e2567adbce53dfc9e62bddf294220fcc (diff) |
Merge branch 'nan_prop' of https://gitlab.com/rmlarsen1/eigen into nan_prop
-rw-r--r-- | Eigen/src/Core/DenseBase.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/MatrixProduct.h | 17 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h | 2 | ||||
-rw-r--r-- | test/gpu_basic.cu | 4 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 12 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_morphing.cpp | 60 |
6 files changed, 87 insertions, 11 deletions
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index f28c47877..20cc4821a 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -454,6 +454,7 @@ template<typename Derived> class DenseBase template<int NaNPropagation> EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const; + // By default, the fastest version with undefined NaN propagation semantics is // used. // TODO(rmlarsen): Replace with default template argument when we move to @@ -465,8 +466,6 @@ template<typename Derived> class DenseBase return maxCoeff<PropagateFast>(); } - - template<typename IndexType> EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const; template<typename IndexType> EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h index 89d81e06f..03d474a70 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h @@ -12,12 +12,21 @@ #include "MatrixProductCommon.h" -#if __GNUC__ > 10 || __clang_major__ > 11 || \ - (__GNUC__ == 10 && (__GNUC_MINOR__ > 2 || \ - (__GNUC_MINOR__ == 2 && \ - __GNUC_PATCHLEVEL__ >= 1))) +#if EIGEN_COMP_LLVM +#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY) +#ifdef __MMA__ +#define EIGEN_ALTIVEC_MMA_ONLY +#else +#define EIGEN_ALTIVEC_DISABLE_MMA +#endif +#endif +#endif + +#ifdef __has_builtin +#if __has_builtin(__builtin_mma_assemble_acc) #define ALTIVEC_MMA_SUPPORT #endif +#endif #if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) #include "MatrixProductMMA.h" diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h index bfee9ee92..37db1a6f1 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h @@ -12,9 +12,11 @@ #pragma GCC target("cpu=power10") +#ifdef __has_builtin #if !__has_builtin(__builtin_vsx_assemble_pair) #define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair #endif +#endif namespace Eigen { diff --git a/test/gpu_basic.cu b/test/gpu_basic.cu index 1935f0bc6..b2e657e72 100644 --- a/test/gpu_basic.cu +++ b/test/gpu_basic.cu @@ -409,6 +409,9 @@ EIGEN_DECLARE_TEST(gpu_basic) // (aka 'ArrayBase<Eigen::Replicate<Eigen::Array<float, 4, 1, 0, 4, 1>, -1, -1> >') has protected default constructor CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array4f>(), nthreads, in, out) ); CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array33f>(), nthreads, in, out) ); + + // HIP does not support new/delete on device. + CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) ); #endif CALL_SUBTEST( run_and_compare_to_gpu(redux<Array4f>(), nthreads, in, out) ); @@ -438,5 +441,4 @@ EIGEN_DECLARE_TEST(gpu_basic) typedef Matrix<float,6,6> Matrix6f; CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues<Matrix6f>(), nthreads, in, out) ); #endif - CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) ); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index ceecd54d0..ef79c8567 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -451,6 +451,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi } m_is_identity = true; + bool degenerate = false; for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) { eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); @@ -458,6 +459,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi op.startIndices()[i] != 0) { m_is_identity = false; } + if (op.sizes()[i] == 0) { // we have an empty size + degenerate = true; + } } // No strides for scalars. @@ -475,8 +479,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi m_outputStrides[0] = 1; for (int i = 1; i < NumDims; ++i) { m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); } } else { m_inputStrides[NumDims-1] = 1; for (int i = NumDims - 2; i >= 0; --i) { @@ -487,8 +491,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi m_outputStrides[NumDims-1] = 1; for (int i = NumDims - 2; i >= 0; --i) { m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); } } } diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp index e8c42a4cd..ed5d5ade3 100644 --- a/unsupported/test/cxx11_tensor_morphing.cpp +++ b/unsupported/test/cxx11_tensor_morphing.cpp @@ -479,6 +479,66 @@ static void test_composition() } } +template<typename T, int DataLayout> +static void test_empty_slice() +{ + Tensor<T, 3, DataLayout> tensor(2,3,5); + tensor.setRandom(); + Tensor<T, 3, DataLayout> copy = tensor; + + // empty size in first dimension + Eigen::DSizes<ptrdiff_t, 3> indices1(1,2,3); + Eigen::DSizes<ptrdiff_t, 3> sizes1(0,1,2); + Tensor<T, 3, DataLayout> slice1(0,1,2); + slice1.setRandom(); + tensor.slice(indices1, sizes1) = slice1; + + // empty size in second dimension + Eigen::DSizes<ptrdiff_t, 3> indices2(1,2,3); + Eigen::DSizes<ptrdiff_t, 3> sizes2(1,0,2); + Tensor<T, 3, DataLayout> slice2(1,0,2); + slice2.setRandom(); + tensor.slice(indices2, sizes2) = slice2; + + // empty size in third dimension + Eigen::DSizes<ptrdiff_t, 3> indices3(1,2,3); + Eigen::DSizes<ptrdiff_t, 3> sizes3(1,1,0); + Tensor<T, 3, DataLayout> slice3(1,1,0); + slice3.setRandom(); + tensor.slice(indices3, sizes3) = slice3; + + // empty size in first and second dimension + Eigen::DSizes<ptrdiff_t, 3> indices4(1,2,3); + Eigen::DSizes<ptrdiff_t, 3> sizes4(0,0,2); + Tensor<T, 3, DataLayout> slice4(0,0,2); + slice4.setRandom(); + tensor.slice(indices4, sizes4) = slice4; + + // empty size in second and third dimension + Eigen::DSizes<ptrdiff_t, 3> indices5(1,2,3); + Eigen::DSizes<ptrdiff_t, 3> sizes5(1,0,0); + Tensor<T, 3, DataLayout> slice5(1,0,0); + slice5.setRandom(); + tensor.slice(indices5, sizes5) = slice5; + + // empty size in all dimensions + Eigen::DSizes<ptrdiff_t, 3> indices6(1,2,3); + Eigen::DSizes<ptrdiff_t, 3> sizes6(0,0,0); + Tensor<T, 3, DataLayout> slice6(0,0,0); + slice6.setRandom(); + tensor.slice(indices6, sizes6) = slice6; + + // none of these operations should change the tensor's components + // because all of the rvalue slices have at least one zero dimension + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + VERIFY_IS_EQUAL(tensor(i,j,k), copy(i,j,k)); + } + } + } +} + #define CALL_SUBTEST_PART(PART) \ CALL_SUBTEST_##PART |