aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2021-02-26 09:12:44 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2021-02-26 09:12:44 -0800
commite67672024d82ad4fb173d647842e0a8ad2448fa1 (patch)
treeb31c01fb64adaf7d3bd8ea0bafe51338b05ab689
parent5e7d4c33d637866ba4c8eef46e4826ace1bf6402 (diff)
parent51eba8c3e2567adbce53dfc9e62bddf294220fcc (diff)
Merge branch 'nan_prop' of https://gitlab.com/rmlarsen1/eigen into nan_prop
-rw-r--r--Eigen/src/Core/DenseBase.h3
-rw-r--r--Eigen/src/Core/arch/AltiVec/MatrixProduct.h17
-rw-r--r--Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h2
-rw-r--r--test/gpu_basic.cu4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h12
-rw-r--r--unsupported/test/cxx11_tensor_morphing.cpp60
6 files changed, 87 insertions, 11 deletions
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index f28c47877..20cc4821a 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -454,6 +454,7 @@ template<typename Derived> class DenseBase
template<int NaNPropagation>
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
+
// By default, the fastest version with undefined NaN propagation semantics is
// used.
// TODO(rmlarsen): Replace with default template argument when we move to
@@ -465,8 +466,6 @@ template<typename Derived> class DenseBase
return maxCoeff<PropagateFast>();
}
-
-
template<typename IndexType> EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
template<typename IndexType> EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
index 89d81e06f..03d474a70 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
@@ -12,12 +12,21 @@
#include "MatrixProductCommon.h"
-#if __GNUC__ > 10 || __clang_major__ > 11 || \
- (__GNUC__ == 10 && (__GNUC_MINOR__ > 2 || \
- (__GNUC_MINOR__ == 2 && \
- __GNUC_PATCHLEVEL__ >= 1)))
+#if EIGEN_COMP_LLVM
+#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY)
+#ifdef __MMA__
+#define EIGEN_ALTIVEC_MMA_ONLY
+#else
+#define EIGEN_ALTIVEC_DISABLE_MMA
+#endif
+#endif
+#endif
+
+#ifdef __has_builtin
+#if __has_builtin(__builtin_mma_assemble_acc)
#define ALTIVEC_MMA_SUPPORT
#endif
+#endif
#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
#include "MatrixProductMMA.h"
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
index bfee9ee92..37db1a6f1 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
@@ -12,9 +12,11 @@
#pragma GCC target("cpu=power10")
+#ifdef __has_builtin
#if !__has_builtin(__builtin_vsx_assemble_pair)
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
#endif
+#endif
namespace Eigen {
diff --git a/test/gpu_basic.cu b/test/gpu_basic.cu
index 1935f0bc6..b2e657e72 100644
--- a/test/gpu_basic.cu
+++ b/test/gpu_basic.cu
@@ -409,6 +409,9 @@ EIGEN_DECLARE_TEST(gpu_basic)
// (aka 'ArrayBase<Eigen::Replicate<Eigen::Array<float, 4, 1, 0, 4, 1>, -1, -1> >') has protected default constructor
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array4f>(), nthreads, in, out) );
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array33f>(), nthreads, in, out) );
+
+ // HIP does not support new/delete on device.
+ CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) );
#endif
CALL_SUBTEST( run_and_compare_to_gpu(redux<Array4f>(), nthreads, in, out) );
@@ -438,5 +441,4 @@ EIGEN_DECLARE_TEST(gpu_basic)
typedef Matrix<float,6,6> Matrix6f;
CALL_SUBTEST( run_and_compare_to_gpu(eigenvalues<Matrix6f>(), nthreads, in, out) );
#endif
- CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) );
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index ceecd54d0..ef79c8567 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -451,6 +451,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
}
m_is_identity = true;
+ bool degenerate = false;
for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
eigen_assert(m_impl.dimensions()[i] >=
op.sizes()[i] + op.startIndices()[i]);
@@ -458,6 +459,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
op.startIndices()[i] != 0) {
m_is_identity = false;
}
+ if (op.sizes()[i] == 0) { // we have an empty size
+ degenerate = true;
+ }
}
// No strides for scalars.
@@ -475,8 +479,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
m_outputStrides[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
- }
+ // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
+ m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); }
} else {
m_inputStrides[NumDims-1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
@@ -487,8 +491,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
m_outputStrides[NumDims-1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
- }
+ // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
+ m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); }
}
}
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index e8c42a4cd..ed5d5ade3 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -479,6 +479,66 @@ static void test_composition()
}
}
+template<typename T, int DataLayout>
+static void test_empty_slice()
+{
+ Tensor<T, 3, DataLayout> tensor(2,3,5);
+ tensor.setRandom();
+ Tensor<T, 3, DataLayout> copy = tensor;
+
+ // empty size in first dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices1(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes1(0,1,2);
+ Tensor<T, 3, DataLayout> slice1(0,1,2);
+ slice1.setRandom();
+ tensor.slice(indices1, sizes1) = slice1;
+
+ // empty size in second dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices2(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes2(1,0,2);
+ Tensor<T, 3, DataLayout> slice2(1,0,2);
+ slice2.setRandom();
+ tensor.slice(indices2, sizes2) = slice2;
+
+ // empty size in third dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices3(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes3(1,1,0);
+ Tensor<T, 3, DataLayout> slice3(1,1,0);
+ slice3.setRandom();
+ tensor.slice(indices3, sizes3) = slice3;
+
+ // empty size in first and second dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices4(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes4(0,0,2);
+ Tensor<T, 3, DataLayout> slice4(0,0,2);
+ slice4.setRandom();
+ tensor.slice(indices4, sizes4) = slice4;
+
+ // empty size in second and third dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices5(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes5(1,0,0);
+ Tensor<T, 3, DataLayout> slice5(1,0,0);
+ slice5.setRandom();
+ tensor.slice(indices5, sizes5) = slice5;
+
+ // empty size in all dimensions
+ Eigen::DSizes<ptrdiff_t, 3> indices6(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes6(0,0,0);
+ Tensor<T, 3, DataLayout> slice6(0,0,0);
+ slice6.setRandom();
+ tensor.slice(indices6, sizes6) = slice6;
+
+ // none of these operations should change the tensor's components
+ // because all of the rvalue slices have at least one zero dimension
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 5; ++k) {
+ VERIFY_IS_EQUAL(tensor(i,j,k), copy(i,j,k));
+ }
+ }
+ }
+}
+
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART