From 3122477c8660f4e66e9cf4bf24e4fdfd6d56378c Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Tue, 12 Dec 2017 11:15:24 -0800 Subject: Update the padding computation for PADDING_SAME to be consistent with TensorFlow. --- unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 3c6a2e091..91d4ead28 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -265,6 +265,10 @@ struct TensorEvaluator, Device> // Calculate the padding m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; + // The padding size calculation for PADDING_SAME has been updated to + // be consistent with how TensorFlow extracts its paddings. + m_rowPaddingTop = numext::maxi(0, m_rowPaddingTop); + m_colPaddingLeft = numext::maxi(0, m_colPaddingLeft); break; default: eigen_assert(false && "unexpected padding"); -- cgit v1.2.3 From 59985cfd26416fb6b196af868c187e90d237c352 Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Sun, 31 Dec 2017 10:44:56 -0500 Subject: Disable use of recurrence for computing twiddle factors. Fixes FFT precision issues for large FFTs. https://github.com/tensorflow/tensorflow/issues/10749#issuecomment-354557689 --- unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 40 +++++++++++++++++--------- unsupported/test/cxx11_tensor_fft.cpp | 28 ++++++++++++++++++ 2 files changed, 54 insertions(+), 14 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 10e0a8a6b..f81da318c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -231,20 +231,32 @@ struct TensorEvaluator, D // t_n = exp(sqrt(-1) * pi * n^2 / line_len) // for n = 0, 1,..., line_len-1. // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 - pos_j_base_powered[0] = ComplexScalar(1, 0); - if (line_len > 1) { - const RealScalar pi_over_len(EIGEN_PI / line_len); - const ComplexScalar pos_j_base = ComplexScalar( - std::cos(pi_over_len), std::sin(pi_over_len)); - pos_j_base_powered[1] = pos_j_base; - if (line_len > 2) { - const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; - for (int j = 2; j < line_len + 1; ++j) { - pos_j_base_powered[j] = pos_j_base_powered[j - 1] * - pos_j_base_powered[j - 1] / - pos_j_base_powered[j - 2] * pos_j_base_sq; - } - } + + // The recurrence is correct in exact arithmetic, but causes + // numerical issues for large transforms, especially in + // single-precision floating point. + // + // pos_j_base_powered[0] = ComplexScalar(1, 0); + // if (line_len > 1) { + // const ComplexScalar pos_j_base = ComplexScalar( + // numext::cos(M_PI / line_len), numext::sin(M_PI / line_len)); + // pos_j_base_powered[1] = pos_j_base; + // if (line_len > 2) { + // const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; + // for (int i = 2; i < line_len + 1; ++i) { + // pos_j_base_powered[i] = pos_j_base_powered[i - 1] * + // pos_j_base_powered[i - 1] / + // pos_j_base_powered[i - 2] * + // pos_j_base_sq; + // } + // } + // } + // TODO(rmlarsen): Find a way to use Eigen's vectorized sin + // and cosine functions here. + for (int j = 0; j < line_len + 1; ++j) { + double arg = ((EIGEN_PI * j) * j) / line_len; + std::complex tmp(numext::cos(arg), numext::sin(arg)); + pos_j_base_powered[j] = static_cast(tmp); } } diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp index 2f14ebc62..a55369477 100644 --- a/unsupported/test/cxx11_tensor_fft.cpp +++ b/unsupported/test/cxx11_tensor_fft.cpp @@ -224,6 +224,32 @@ static void test_fft_real_input_energy() { } } +template +static void test_fft_non_power_of_2_round_trip(int exponent) { + int n = (1 << exponent) + 1; + + Eigen::DSizes dimensions; + dimensions[0] = n; + const DSizes arr = dimensions; + Tensor input; + + input.resize(arr); + input.setRandom(); + + array fft; + fft[0] = 0; + + Tensor, 1, ColMajor> forward = + input.template fft(fft); + + Tensor output = + forward.template fft(fft); + + for (int i = 0; i < n; ++i) { + VERIFY_IS_APPROX(input[i], output[i]); + } +} + void test_cxx11_tensor_fft() { test_fft_complex_input_golden(); test_fft_real_input_golden(); @@ -270,4 +296,6 @@ void test_cxx11_tensor_fft() { test_fft_real_input_energy(); test_fft_real_input_energy(); test_fft_real_input_energy(); + + test_fft_non_power_of_2_round_trip(7); } -- cgit v1.2.3 From 1eff6cf8a77f1b8699671d31f8f307a6fd9170ea Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Tue, 20 Feb 2018 16:50:05 -0800 Subject: Use device's allocate function instead of internal::aligned_malloc. This would make it easier to track memory usage in device instances. --- unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index d30cc96ab..6fb69910e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -377,7 +377,7 @@ struct TensorEvaluator(bm_ * bk_ * sizeof(LhsScalar), align) * align; size_t rhs_size = divup(bn_ * bk_ * sizeof(RhsScalar), align) * align; - packed_mem_ = static_cast(internal::aligned_malloc( + packed_mem_ = static_cast(device_.allocate( (nm0_ * lhs_size + nn0_ * rhs_size) * std::min(nk_, P - 1))); char* mem = static_cast(packed_mem_); for (Index x = 0; x < numext::mini(nk_, P - 1); x++) { @@ -399,7 +399,7 @@ struct TensorEvaluator Date: Fri, 27 Jul 2018 12:36:34 -0700 Subject: Reduce the number of template specializations of classes related to tensor contraction to reduce binary size. --- .../Eigen/CXX11/src/Tensor/TensorContraction.h | 105 ++++++++++-------- .../CXX11/src/Tensor/TensorContractionBlocking.h | 7 +- .../CXX11/src/Tensor/TensorContractionThreadPool.h | 118 +++++++++------------ 3 files changed, 109 insertions(+), 121 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 0e69cd40c..57b5339d1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -177,9 +177,9 @@ struct NoOpOutputKernel { */ template EIGEN_ALWAYS_INLINE void operator()( - const OutputKernel::OutputMapper& output_mapper, - const TensorContractionParams& params, Index i, Index j, Index num_rows, - Index num_cols) const {} + const OutputKernel::OutputMapper& /*output_mapper*/, + const TensorContractionParams& /*params*/, Index /*i*/, + Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {} }; template @@ -467,42 +467,58 @@ struct TensorContractionEvaluatorBase } } - EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { - if (this->m_lhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - static_cast(this)->template evalProduct(buffer); - } - else { - static_cast(this)->template evalProduct(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - static_cast(this)->template evalProduct(buffer); - } - else { - static_cast(this)->template evalProduct(buffer); - } - } +#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \ + if (this->m_lhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHODARGS; \ + } \ + else { \ + METHODARGS; \ + } \ + } \ + else { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHODARGS; \ + } \ + else { \ + METHODARGS; \ + } \ + } \ + } \ + else { \ + if (this->m_rhs_inner_dim_contiguous) { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHODARGS; \ + } \ + else { \ + METHODARGS; \ + } \ + } \ + else { \ + if (this->m_rhs_inner_dim_reordered) { \ + METHODARGS; \ + } \ + else { \ + METHODARGS; \ + } \ + } \ } - else { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - static_cast(this)->template evalProduct(buffer); - } - else { - static_cast(this)->template evalProduct(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - static_cast(this)->template evalProduct(buffer); - } - else { - static_cast(this)->template evalProduct(buffer); - } - } + + EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { + static_cast(this)->template evalProduct(buffer); + } + + template + void evalProductSequential(Scalar* buffer) const { + if (this->m_j_size == 1) { + this->template evalGemv(buffer); + } else { + this->template evalGemm(buffer); } } @@ -623,7 +639,7 @@ struct TensorContractionEvaluatorBase OutputMapper output(buffer, m); // Sizes of the blocks to load in cache. See the Goto paper for details. - internal::TensorContractionBlocking blocking(k, m, n, 1); + internal::TensorContractionBlocking blocking(k, m, n, 1); const Index kc = blocking.kc(); const Index mc = numext::mini(m, blocking.mc()); const Index nc = numext::mini(n, blocking.nc()); @@ -976,14 +992,9 @@ struct TensorEvaluator - EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { - if (this->m_j_size == 1) { - this->template evalGemv(buffer); - return; - } - - this->template evalGemm(buffer); + template + void evalProduct(Scalar* buffer) const { + TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential, Alignment, (buffer)); } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h index 8c1af1da8..cf281192c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -21,13 +21,10 @@ enum { // Default Blocking Strategy -template +template class TensorContractionBlocking { public: - typedef typename LhsMapper::Scalar LhsScalar; - typedef typename RhsMapper::Scalar RhsScalar; - /* adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h` requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h` @@ -41,7 +38,7 @@ class TensorContractionBlocking { ../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901: dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function */ - + #if !defined(EIGEN_HIPCC) EIGEN_DEVICE_FUNC #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 8b86d7aaf..182c5f7f9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -71,8 +71,7 @@ struct TensorEvaluator + template void evalProduct(Scalar* buffer) const { const Index m = this->m_i_size; const Index n = this->m_j_size; @@ -96,39 +95,6 @@ struct TensorEvaluator::type - LhsScalar; - typedef - typename internal::remove_const::type - RhsScalar; - typedef typename internal::gebp_traits Traits; - typedef TensorEvaluator LeftEvaluator; - typedef TensorEvaluator RightEvaluator; - typedef internal::TensorContractionInputMapper< - LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, - contract_t, internal::packet_traits::size, - lhs_inner_dim_contiguous, false, Unaligned> - LhsMapper; - typedef internal::TensorContractionInputMapper< - RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, - contract_t, internal::packet_traits::size, - rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned> - RhsMapper; - typedef internal::blas_data_mapper OutputMapper; - typedef internal::gemm_pack_lhs - LhsPacker; - typedef internal::gemm_pack_rhs< - RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> - RhsPacker; - typedef internal::gebp_kernel - GebpKernel; - - - // Compute a set of algorithm parameters: // - kernel block sizes (bm, bn, bk) // - task grain sizes (number of kernels executed per task: gm, gn) @@ -158,14 +124,14 @@ struct TensorEvaluator blocking(k, m, n, 2); bm = blocking.mc(); bn = blocking.nc(); bk = blocking.kc(); } else { - internal::TensorContractionBlocking blocking(k, m, n, 2); bm = blocking.mc(); @@ -187,29 +153,22 @@ struct TensorEvaluatortemplate evalGemv(buffer); - else - this->template evalGemm(buffer); + TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential, + Unaligned, (buffer)); return; } // Now that we know number of threads, recalculate sharding and blocking. shard_by_col = shardByCol(m, n, num_threads); if (shard_by_col) { - internal::TensorContractionBlocking blocking(k, m, n, num_threads); bm = blocking.mc(); bn = blocking.nc(); bk = blocking.kc(); } else { - internal::TensorContractionBlocking blocking(k, m, n, num_threads); bm = blocking.mc(); @@ -257,34 +216,55 @@ struct TensorEvaluatorm_leftImpl, this->m_left_nocontract_strides, - this->m_i_strides, this->m_left_contracting_strides, - this->m_k_strides); + #define CONTEXT_ARGS \ + (this, num_threads, buffer, m, n, k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, \ + nn0, shard_by_col, parallel_pack) \ + .run() + + TENSOR_CONTRACTION_DISPATCH(Context, Alignment, CONTEXT_ARGS); - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, - this->m_j_strides, this->m_right_contracting_strides, - this->m_k_strides); +#undef CONTEXT_ARGS - Context(this, num_threads, lhs, rhs, buffer, m, n, - k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0, - shard_by_col, parallel_pack) - .run(); } // Context coordinates a single parallel gemm operation. - template + template class Context { public: - Context(const Self* self, int num_threads, LhsMapper& lhs, - RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm, - Index bn, Index bk, Index nm, Index nn, Index nk, Index gm, - Index gn, Index nm0, Index nn0, bool shard_by_col, + typedef internal::TensorContractionInputMapper< + LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, + contract_t, internal::packet_traits::size, + lhs_inner_dim_contiguous, false, Unaligned> + LhsMapper; + typedef internal::TensorContractionInputMapper< + RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, + contract_t, internal::packet_traits::size, + rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned> + RhsMapper; + typedef internal::gemm_pack_lhs + LhsPacker; + typedef internal::gemm_pack_rhs< + RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> + RhsPacker; + typedef internal::blas_data_mapper OutputMapper; + typedef internal::gebp_kernel + GebpKernel; + + Context(const Self* self, int num_threads, Scalar* buffer, Index tm, Index tn, + Index tk, Index bm, Index bn, Index bk, Index nm, Index nn, Index nk, + Index gm, Index gn, Index nm0, Index nn0, bool shard_by_col, bool parallel_pack) : device_(self->m_device), - lhs_(lhs), - rhs_(rhs), + lhs_(self->m_leftImpl, self->m_left_nocontract_strides, + self->m_i_strides, self->m_left_contracting_strides, + self->m_k_strides), + rhs_(self->m_rightImpl, self->m_right_nocontract_strides, + self->m_j_strides, self->m_right_contracting_strides, + self->m_k_strides), buffer_(buffer), output_(buffer, tm), output_kernel_(self->m_output_kernel), @@ -376,8 +356,8 @@ struct TensorEvaluator Date: Tue, 31 Jul 2018 10:10:14 +0200 Subject: Speedup trivial tensor broadcasting on GPU by enforcing unaligned loads. See PR 437. --- unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 278689915..e647b3609 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -284,7 +284,13 @@ struct TensorEvaluator, Device> if (static_cast(Layout) == static_cast(ColMajor)) { if (isCopy) { + #ifdef EIGEN_GPU_COMPILE_PHASE + // See PR 437: on NVIDIA P100 and K20m we observed a x3-4 speed up by enforcing + // unaligned loads here. The reason is unclear though. + return m_impl.template packet(index); + #else return m_impl.template packet(index); + #endif } else if (oneByN && !nByOne) { return packetNByOne(index); } else if (!oneByN && nByOne) { @@ -296,7 +302,12 @@ struct TensorEvaluator, Device> } } else { if (isCopy) { + #ifdef EIGEN_GPU_COMPILE_PHASE + // See above. + return m_impl.template packet(index); + #else return m_impl.template packet(index); + #endif } else if (oneByN && !nByOne) { return packetOneByN(index); } else if (!oneByN && nByOne) { -- cgit v1.2.3 From 9e219bb3d3f0f3a3157dcf8c2a27895e9f85035b Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Aug 2018 10:47:49 +0100 Subject: Converting ad-hoc inline keyword to EIGEN_STRONG_INLINE MACRO. --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 14 ++-- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 14 ++-- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 8 +-- unsupported/Eigen/CXX11/src/util/CXX11Meta.h | 74 +++++++++++----------- .../SpecialFunctions/SpecialFunctionsArrayAPI.h | 18 +++--- .../SpecialFunctions/SpecialFunctionsFunctors.h | 32 +++++----- 8 files changed, 83 insertions(+), 83 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index e3f6e37f0..aed71b265 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -112,7 +112,7 @@ class Tensor : public TensorBase - EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 5ca47cca7..4f973a5b7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -41,7 +41,7 @@ template struct fixed_size_tensor_index_linearization_helper { template EIGEN_DEVICE_FUNC - static inline Index run(array const& indices, + static EIGEN_STRONG_INLINE Index run(array const& indices, const Dimensions& dimensions) { return array_get(indices) + @@ -54,7 +54,7 @@ template struct fixed_size_tensor_index_linearization_helper { template EIGEN_DEVICE_FUNC - static inline Index run(array const&, const Dimensions&) + static EIGEN_STRONG_INLINE Index run(array const&, const Dimensions&) { return 0; } @@ -64,7 +64,7 @@ template struct fixed_size_tensor_index_extraction_helper { template EIGEN_DEVICE_FUNC - static inline Index run(const Index index, + static EIGEN_STRONG_INLINE Index run(const Index index, const Dimensions& dimensions) { const Index mult = (index == n-1) ? 1 : 0; @@ -77,7 +77,7 @@ template struct fixed_size_tensor_index_extraction_helper { template EIGEN_DEVICE_FUNC - static inline Index run(const Index, + static EIGEN_STRONG_INLINE Index run(const Index, const Dimensions&) { return 0; @@ -421,20 +421,20 @@ template struct sizes_match_below_dim { - static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) { return false; } }; template struct sizes_match_below_dim { - static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) { return (array_get(dims1) == array_get(dims2)) & sizes_match_below_dim::run(dims1, dims2); } }; template struct sizes_match_below_dim { - static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) { return true; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index ac5afd891..17008917a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -39,7 +39,7 @@ class TensorExecutor { using StorageIndex = typename Expression::Index; EIGEN_DEVICE_FUNC - static inline void run(const Expression& expr, + static EIGEN_STRONG_INLINE void run(const Expression& expr, const Device& device = Device()) { TensorEvaluator evaluator(expr, device); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); @@ -63,7 +63,7 @@ class TensorExecutor evaluator(expr, device); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); @@ -111,7 +111,7 @@ class TensorExecutor::NumDimensions; EIGEN_DEVICE_FUNC - static inline void run(const Expression& expr, + static EIGEN_STRONG_INLINE void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) { using TensorBlock = TensorBlock; @@ -223,7 +223,7 @@ class TensorExecutor { public: using StorageIndex = typename Expression::Index; - static inline void run(const Expression& expr, + static EIGEN_STRONG_INLINE void run(const Expression& expr, const ThreadPoolDevice& device) { typedef TensorEvaluator Evaluator; typedef EvalRange EvalRange; @@ -257,7 +257,7 @@ class TensorExecutor::NumDimensions; - static inline void run(const Expression& expr, + static EIGEN_STRONG_INLINE void run(const Expression& expr, const ThreadPoolDevice& device) { using TensorBlock = TensorBlock; @@ -376,7 +376,7 @@ EigenMetaKernel(Evaluator eval, StorageIndex size) { /*static*/ template -inline void TensorExecutor::run( +EIGEN_STRONG_INLINE void TensorExecutor::run( const Expression& expr, const GpuDevice& device) { TensorEvaluator evaluator(expr, device); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); @@ -405,7 +405,7 @@ inline void TensorExecutor::run( template class TensorExecutor { public: - static inline void run(const Expression &expr, const SyclDevice &device) { + static EIGEN_STRONG_INLINE void run(const Expression &expr, const SyclDevice &device) { // call TensorSYCL module TensorSycl::run(expr, device); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3d0e4035a..7ecd4d1ac 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -20,7 +20,7 @@ namespace internal { template struct scalar_mod_op { EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a % m_divisor; } const Scalar m_divisor; }; template @@ -34,7 +34,7 @@ struct functor_traits > template struct scalar_mod2_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op) - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } }; template struct functor_traits > diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 8810d78cf..98ad661ca 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -75,10 +75,10 @@ template struct NumTraits > MulCost = 1 }; - EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; } - EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; } - EIGEN_DEVICE_FUNC static inline Real highest() { return n; } - EIGEN_DEVICE_FUNC static inline Real lowest() { return n; } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real epsilon() { return 0; } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real dummy_precision() { return 0; } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real highest() { return n; } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real lowest() { return n; } }; namespace internal { diff --git a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h index 8de3bbcab..6c95d0a6c 100644 --- a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h @@ -104,9 +104,9 @@ template<> struct h_skip_helper_type<0> template struct h_skip { template - constexpr static inline typename h_skip_helper_numeric::type helper(numeric_list) { return typename h_skip_helper_numeric::type(); } + constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_numeric::type helper(numeric_list) { return typename h_skip_helper_numeric::type(); } template - constexpr static inline typename h_skip_helper_type::type helper(type_list) { return typename h_skip_helper_type::type(); } + constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_type::type helper(type_list) { return typename h_skip_helper_type::type(); } }; template struct skip { typedef decltype(h_skip::helper(a())) type; }; @@ -268,7 +268,7 @@ template< typename Reducer > struct reduce { - EIGEN_DEVICE_FUNC constexpr static inline int run() { return Reducer::Identity; } + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE int run() { return Reducer::Identity; } }; template< @@ -276,7 +276,7 @@ template< typename A > struct reduce { - EIGEN_DEVICE_FUNC constexpr static inline A run(A a) { return a; } + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE A run(A a) { return a; } }; template< @@ -285,7 +285,7 @@ template< typename... Ts > struct reduce { - EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce::run(ts...))) { + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce::run(ts...))) { return Reducer::run(a, reduce::run(ts...)); } }; @@ -293,29 +293,29 @@ template< /* generic binary operations */ struct sum_op { - template EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } + template EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a + b) { return a + b; } static constexpr int Identity = 0; }; struct product_op { - template EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } + template EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a * b) { return a * b; } static constexpr int Identity = 1; }; -struct logical_and_op { template constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } }; -struct logical_or_op { template constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } }; +struct logical_and_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a && b) { return a && b; } }; +struct logical_or_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a || b) { return a || b; } }; -struct equal_op { template constexpr static inline auto run(A a, B b) -> decltype(a == b) { return a == b; } }; -struct not_equal_op { template constexpr static inline auto run(A a, B b) -> decltype(a != b) { return a != b; } }; -struct lesser_op { template constexpr static inline auto run(A a, B b) -> decltype(a < b) { return a < b; } }; -struct lesser_equal_op { template constexpr static inline auto run(A a, B b) -> decltype(a <= b) { return a <= b; } }; -struct greater_op { template constexpr static inline auto run(A a, B b) -> decltype(a > b) { return a > b; } }; -struct greater_equal_op { template constexpr static inline auto run(A a, B b) -> decltype(a >= b) { return a >= b; } }; +struct equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a == b) { return a == b; } }; +struct not_equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a != b) { return a != b; } }; +struct lesser_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a < b) { return a < b; } }; +struct lesser_equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a <= b) { return a <= b; } }; +struct greater_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a > b) { return a > b; } }; +struct greater_equal_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a >= b) { return a >= b; } }; /* generic unary operations */ -struct not_op { template constexpr static inline auto run(A a) -> decltype(!a) { return !a; } }; -struct negation_op { template constexpr static inline auto run(A a) -> decltype(-a) { return -a; } }; -struct greater_equal_zero_op { template constexpr static inline auto run(A a) -> decltype(a >= 0) { return a >= 0; } }; +struct not_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(!a) { return !a; } }; +struct negation_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(-a) { return -a; } }; +struct greater_equal_zero_op { template constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(a >= 0) { return a >= 0; } }; /* reductions for lists */ @@ -324,13 +324,13 @@ struct greater_equal_zero_op { template constexpr static inline auto // together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1 // does... template -EIGEN_DEVICE_FUNC constexpr inline decltype(reduce::run((*((Ts*)0))...)) arg_prod(Ts... ts) +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE decltype(reduce::run((*((Ts*)0))...)) arg_prod(Ts... ts) { return reduce::run(ts...); } template -constexpr inline decltype(reduce::run((*((Ts*)0))...)) arg_sum(Ts... ts) +constexpr EIGEN_STRONG_INLINE decltype(reduce::run((*((Ts*)0))...)) arg_sum(Ts... ts) { return reduce::run(ts...); } @@ -338,13 +338,13 @@ constexpr inline decltype(reduce::run((*((Ts*)0))...)) arg_sum(Ts /* reverse arrays */ template -constexpr inline Array h_array_reverse(Array arr, numeric_list) +constexpr EIGEN_STRONG_INLINE Array h_array_reverse(Array arr, numeric_list) { return {{array_get(arr)...}}; } template -constexpr inline array array_reverse(array arr) +constexpr EIGEN_STRONG_INLINE array array_reverse(array arr) { return h_array_reverse(arr, typename gen_numeric_list::type()); } @@ -359,7 +359,7 @@ constexpr inline array array_reverse(array arr) // an infinite loop) template struct h_array_reduce { - EIGEN_DEVICE_FUNC constexpr static inline auto run(array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) { return Reducer::run(h_array_reduce::run(arr, identity), array_get(arr)); } @@ -368,7 +368,7 @@ struct h_array_reduce { template struct h_array_reduce { - EIGEN_DEVICE_FUNC constexpr static inline T run(const array& arr, T) + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array& arr, T) { return array_get<0>(arr); } @@ -377,14 +377,14 @@ struct h_array_reduce template struct h_array_reduce { - EIGEN_DEVICE_FUNC constexpr static inline T run(const array&, T identity) + EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array&, T identity) { return identity; } }; template -EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_reduce(const array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) { return h_array_reduce::run(arr, identity); } @@ -392,13 +392,13 @@ EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array& arr, T i /* standard array reductions */ template -EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array& arr) -> decltype(array_reduce(arr, static_cast(0))) +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_sum(const array& arr) -> decltype(array_reduce(arr, static_cast(0))) { return array_reduce(arr, static_cast(0)); } template -EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array& arr) -> decltype(array_reduce(arr, static_cast(1))) +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_prod(const array& arr) -> decltype(array_reduce(arr, static_cast(1))) { return array_reduce(arr, static_cast(1)); } @@ -414,13 +414,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector& a) { /* zip an array */ template -constexpr inline array h_array_zip(array a, array b, numeric_list) +constexpr EIGEN_STRONG_INLINE array h_array_zip(array a, array b, numeric_list) { return array{{ Op::run(array_get(a), array_get(b))... }}; } template -constexpr inline array array_zip(array a, array b) +constexpr EIGEN_STRONG_INLINE array array_zip(array a, array b) { return h_array_zip(a, b, typename gen_numeric_list::type()); } @@ -428,13 +428,13 @@ constexpr inline array array_zip(array a, a /* zip an array and reduce the result */ template -constexpr inline auto h_array_zip_and_reduce(array a, array b, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(a), array_get(b))...)) +constexpr EIGEN_STRONG_INLINE auto h_array_zip_and_reduce(array a, array b, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(a), array_get(b))...)) { return reduce::type...>::run(Op::run(array_get(a), array_get(b))...); } template -constexpr inline auto array_zip_and_reduce(array a, array b) -> decltype(h_array_zip_and_reduce(a, b, typename gen_numeric_list::type())) +constexpr EIGEN_STRONG_INLINE auto array_zip_and_reduce(array a, array b) -> decltype(h_array_zip_and_reduce(a, b, typename gen_numeric_list::type())) { return h_array_zip_and_reduce(a, b, typename gen_numeric_list::type()); } @@ -442,13 +442,13 @@ constexpr inline auto array_zip_and_reduce(array a, array b) -> decl /* apply stuff to an array */ template -constexpr inline array h_array_apply(array a, numeric_list) +constexpr EIGEN_STRONG_INLINE array h_array_apply(array a, numeric_list) { return array{{ Op::run(array_get(a))... }}; } template -constexpr inline array array_apply(array a) +constexpr EIGEN_STRONG_INLINE array array_apply(array a) { return h_array_apply(a, typename gen_numeric_list::type()); } @@ -456,13 +456,13 @@ constexpr inline array array_apply(array a) /* apply stuff to an array and reduce */ template -constexpr inline auto h_array_apply_and_reduce(array arr, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(arr))...)) +constexpr EIGEN_STRONG_INLINE auto h_array_apply_and_reduce(array arr, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(arr))...)) { return reduce::type...>::run(Op::run(array_get(arr))...); } template -constexpr inline auto array_apply_and_reduce(array a) -> decltype(h_array_apply_and_reduce(a, typename gen_numeric_list::type())) +constexpr EIGEN_STRONG_INLINE auto array_apply_and_reduce(array a) -> decltype(h_array_apply_and_reduce(a, typename gen_numeric_list::type())) { return h_array_apply_and_reduce(a, typename gen_numeric_list::type()); } @@ -476,7 +476,7 @@ template struct h_repeat { template - constexpr static inline array run(t v, numeric_list) + constexpr static EIGEN_STRONG_INLINE array run(t v, numeric_list) { return {{ typename id_numeric::type(v)... }}; } diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h index 30cdf4751..ed6d83251 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h @@ -24,7 +24,7 @@ namespace Eigen { * \sa Eigen::igammac(), Eigen::lgamma() */ template -inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> igamma(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) { return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( @@ -47,7 +47,7 @@ igamma(const Eigen::ArrayBase& a, const Eigen::ArrayBase -inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> igamma_der_a(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) { return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( a.derived(), @@ -68,7 +68,7 @@ igamma_der_a(const Eigen::ArrayBase& a, const Eigen::ArrayBase -inline const Eigen::CwiseBinaryOp, const AlphaDerived, const SampleDerived> +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const AlphaDerived, const SampleDerived> gamma_sample_der_alpha(const Eigen::ArrayBase& alpha, const Eigen::ArrayBase& sample) { return Eigen::CwiseBinaryOp, const AlphaDerived, const SampleDerived>( alpha.derived(), @@ -86,7 +86,7 @@ gamma_sample_der_alpha(const Eigen::ArrayBase& alpha, const Eigen: * \sa Eigen::igamma(), Eigen::lgamma() */ template -inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> igammac(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) { return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( @@ -108,7 +108,7 @@ igammac(const Eigen::ArrayBase& a, const Eigen::ArrayBase -inline const Eigen::CwiseBinaryOp, const DerivedN, const DerivedX> +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const DerivedN, const DerivedX> polygamma(const Eigen::ArrayBase& n, const Eigen::ArrayBase& x) { return Eigen::CwiseBinaryOp, const DerivedN, const DerivedX>( @@ -128,7 +128,7 @@ polygamma(const Eigen::ArrayBase& n, const Eigen::ArrayBase& * \sa Eigen::betainc(), Eigen::lgamma() */ template -inline const Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived> +EIGEN_STRONG_INLINE const Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived> betainc(const Eigen::ArrayBase& a, const Eigen::ArrayBase& b, const Eigen::ArrayBase& x) { return Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived>( @@ -152,7 +152,7 @@ betainc(const Eigen::ArrayBase& a, const Eigen::ArrayBase -inline const Eigen::CwiseBinaryOp, const DerivedX, const DerivedQ> +EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp, const DerivedX, const DerivedQ> zeta(const Eigen::ArrayBase& x, const Eigen::ArrayBase& q) { return Eigen::CwiseBinaryOp, const DerivedX, const DerivedQ>( @@ -176,7 +176,7 @@ zeta(const Eigen::ArrayBase& x, const Eigen::ArrayBase& q) * \sa ArrayBase::i0e() */ template -inline const Eigen::CwiseUnaryOp< +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< Eigen::internal::scalar_i0e_op, const Derived> i0e(const Eigen::ArrayBase& x) { return Eigen::CwiseUnaryOp< @@ -199,7 +199,7 @@ i0e(const Eigen::ArrayBase& x) { * \sa ArrayBase::i1e() */ template -inline const Eigen::CwiseUnaryOp< +EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp< Eigen::internal::scalar_i1e_op, const Derived> i1e(const Eigen::ArrayBase& x) { return Eigen::CwiseUnaryOp< diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index 3a63dcdd6..c6fac91bb 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -155,11 +155,11 @@ struct functor_traits > { */ template struct scalar_lgamma_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::lgamma; return lgamma(a); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::plgamma(a); } }; template struct functor_traits > @@ -177,11 +177,11 @@ struct functor_traits > */ template struct scalar_digamma_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::digamma; return digamma(a); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } }; template struct functor_traits > @@ -199,11 +199,11 @@ struct functor_traits > */ template struct scalar_zeta_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& q) const { using numext::zeta; return zeta(x, q); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } }; template struct functor_traits > @@ -221,11 +221,11 @@ struct functor_traits > */ template struct scalar_polygamma_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& n, const Scalar& x) const { using numext::polygamma; return polygamma(n, x); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } }; template struct functor_traits > @@ -244,11 +244,11 @@ struct functor_traits > */ template struct scalar_erf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::erf; return erf(a); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perf(a); } }; template struct functor_traits > @@ -267,11 +267,11 @@ struct functor_traits > */ template struct scalar_erfc_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::erfc; return erfc(a); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perfc(a); } }; template struct functor_traits > @@ -291,12 +291,12 @@ struct functor_traits > template struct scalar_i0e_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op) - EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { using numext::i0e; return i0e(x); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { return internal::pi0e(x); } }; @@ -318,12 +318,12 @@ struct functor_traits > { template struct scalar_i1e_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op) - EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const { using numext::i1e; return i1e(x); } typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { return internal::pi1e(x); } }; -- cgit v1.2.3 From d7a84148483b1a11b993c037a2cea5b43f2c052f Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Aug 2018 11:56:30 +0100 Subject: Distinguishing between internal memory allocation/deallocation from explicit user memory allocation/deallocation. --- unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h | 8 ++++---- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h | 6 ++++++ unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h | 7 +++++++ .../Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 8 ++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 14 +++++++------- 6 files changed, 34 insertions(+), 13 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index 0e4db46de..7b4d56e4b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -112,7 +112,7 @@ struct TensorEvaluator, Devi return false; } else { m_result = static_cast( - m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); evalTo(m_result); return true; } @@ -120,7 +120,7 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { if (m_result != NULL) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } } @@ -273,7 +273,7 @@ struct TensorEvaluator(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + m_result = static_cast(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); evalTo(m_result); return true; } @@ -281,7 +281,7 @@ struct TensorEvaluatorallocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + stream_->deallocate(buffer); + } virtual void* scratchpad() const { if (scratch_ == NULL) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 90fd99027..5a16ebe50 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -105,6 +105,14 @@ struct ThreadPoolDevice { internal::aligned_free(buffer); } + EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + deallocate(buffer); + } + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { ::memcpy(dst, src, n); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index fdb31928f..09f6f2067 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -115,7 +115,7 @@ struct TensorEvaluator, Device> #endif EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { const Index numValues = internal::array_prod(m_impl.dimensions()); - m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); + m_buffer = (CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)); // Should initialize the memory in case we're dealing with non POD types. if (NumTraits::RequireInitialization) { for (Index i = 0; i < numValues; ++i) { @@ -129,7 +129,7 @@ struct TensorEvaluator, Device> return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_device.deallocate(m_buffer); + m_device.deallocate_temp(m_buffer); m_buffer = NULL; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 375fc0802..c1cbdebc6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -513,7 +513,7 @@ struct TensorEvaluator, !RunningOnGPU))) { bool need_assign = false; if (!data) { - m_result = static_cast(m_device.allocate(sizeof(CoeffReturnType))); + m_result = static_cast(m_device.allocate_temp(sizeof(CoeffReturnType))); data = m_result; need_assign = true; } @@ -525,7 +525,7 @@ struct TensorEvaluator, const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); if (!data) { - data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + data = static_cast(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } Op reducer(m_reducer); @@ -549,7 +549,7 @@ struct TensorEvaluator, const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); if (!data) { if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { - data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + data = static_cast(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } else { @@ -559,7 +559,7 @@ struct TensorEvaluator, Op reducer(m_reducer); if (internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (m_result) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } return true; @@ -582,7 +582,7 @@ struct TensorEvaluator, const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); if (!data) { if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { - data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + data = static_cast(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } else { @@ -592,7 +592,7 @@ struct TensorEvaluator, Op reducer(m_reducer); if (internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (m_result) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } return true; @@ -607,7 +607,7 @@ struct TensorEvaluator, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); if (m_result) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } } -- cgit v1.2.3 From 3a197a60e602ea0cd836438ab717810803dc9074 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Aug 2018 12:19:14 +0100 Subject: variadic version of assert which can take a parameter pack as its input. --- Eigen/src/Core/util/Macros.h | 24 ++++++++++++++++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 2 ++ 2 files changed, 26 insertions(+) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index b15819f7d..3255b8351 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1076,4 +1076,28 @@ namespace Eigen { # endif #endif +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +// Provide a variadic version of assert which can take a parameter pack as its input +// The eigen_assert macro used here might have been redefined to use other macros such as EIGEN_THROW, such as used in Eigen's test suite, therefore this needs to be defined after the other macros. +// Note that this does not provide as nice a string to assert as a straight forward call to eigen_assert, so we add a message to the assert. +#if defined(EIGEN_NO_DEBUG) +#define eigen_variadic_assert(x) +#else +namespace Eigen { +namespace internal { +inline void variadic_assert(const char*) {} +template inline void variadic_assert(const char* message, bool first, Bools ... others) { + eigen_assert(first && message); + variadic_assert(message, others...); + EIGEN_UNUSED_VARIABLE(first); +} +} +} +#define EIGEN_VARIADIC_ASSERT_MESSAGE(x) EIGEN_MAKESTRING(x) " in " __FILE__ ":" EIGEN_MAKESTRING(__LINE__) +#define eigen_variadic_assert(x) \ + do { Eigen::internal::variadic_assert(EIGEN_VARIADIC_ASSERT_MESSAGE(x), x); } while(false); +#endif +#endif + + #endif // EIGEN_MACROS_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index a8e55757e..f69f8f24a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -150,6 +150,7 @@ template class MakePoin EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const { EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_variadic_assert((Eigen::NumTraits::highest() >= otherIndices)...); if (PlainObjectType::Options&RowMajor) { const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); return m_data[index]; @@ -237,6 +238,7 @@ template class MakePoin EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) { static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + eigen_variadic_assert((Eigen::NumTraits::highest() >= otherIndices)...); const std::size_t NumDims = sizeof...(otherIndices) + 2; if (PlainObjectType::Options&RowMajor) { const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); -- cgit v1.2.3 From b512a9536f4b6260fd7af1d39f337eea8c6932cb Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Aug 2018 13:39:13 +0100 Subject: Enabling per device specialisation of packetsize. --- unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 4 ++-- .../Eigen/CXX11/src/Tensor/TensorConcatenation.h | 4 ++-- .../Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorConversion.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorCustomOp.h | 8 ++++---- unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 24 ++++++++++++---------- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 4 ++-- .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 4 ++-- .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorInflation.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 8 ++++---- .../Eigen/CXX11/src/Tensor/TensorStriding.h | 4 ++-- .../Eigen/CXX11/src/Tensor/TensorVolumePatch.h | 2 +- 23 files changed, 48 insertions(+), 46 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index 199ddb123..f1f877c16 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -98,7 +98,7 @@ struct TensorEvaluator, Device> typedef typename PacketType::type PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; static const int NumDims = XprType::NumDims; enum { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 8fecbe657..b4a77b022 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -104,7 +104,7 @@ struct TensorEvaluator, Device> typedef typename TensorEvaluator::Dimensions InputDimensions; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; bool isCopy= false, nByOne = false, oneByN = false; enum { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 085c05f3d..3ab0a0f49 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -138,7 +138,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { @@ -339,7 +339,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 9f0321880..27c92d8f6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -251,7 +251,7 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int packetSize = internal::unpacket_traits::size; + const int packetSize = PacketType::size; EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); @@ -354,7 +354,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - const int packetSize = internal::unpacket_traits::size; + const int packetSize = PacketType::size; EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 86602c27e..e1649fb47 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -239,7 +239,7 @@ struct TensorContractionEvaluatorBase enum { IsAligned = true, - PacketAccess = (internal::unpacket_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index e0cbbb315..a7751eee1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -190,7 +190,7 @@ struct TensorEvaluator, Device> typedef typename internal::remove_all::Scalar>::type SrcType; typedef typename PacketType::type PacketReturnType; typedef typename PacketType::type PacketSourceType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 1ec5819a7..0d3ca966c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -302,7 +302,7 @@ struct TensorEvaluator::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index 0e4db46de..571922073 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -87,11 +87,11 @@ struct TensorEvaluator, Devi typedef typename internal::remove_const::type Scalar; typedef typename internal::remove_const::type CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, - PacketAccess = (internal::packet_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -249,11 +249,11 @@ struct TensorEvaluator::type CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, - PacketAccess = (internal::packet_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index af39daa91..256d499f2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -102,7 +102,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename internal::remove_const::type CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = TensorEvaluator::IsAligned, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index f9a1bd68c..8f7a81575 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -33,6 +33,7 @@ struct TensorEvaluator typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; typedef Derived XprType; + static const int PacketSize = PacketType::size; // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits::NumDimensions > 0 ? @@ -40,7 +41,7 @@ struct TensorEvaluator enum { IsAligned = Derived::IsAligned, - PacketAccess = (internal::unpacket_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = internal::is_arithmetic::type>::value, Layout = Derived::Layout, CoordAccess = NumCoords > 0, @@ -121,7 +122,7 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, - internal::unpacket_traits::size); + PacketType::size); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( @@ -188,10 +189,11 @@ struct TensorEvaluator // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits::NumDimensions > 0 ? internal::traits::NumDimensions : 0; + static const int PacketSize = PacketType::size; enum { IsAligned = Derived::IsAligned, - PacketAccess = (internal::unpacket_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = internal::is_arithmetic::type>::value, Layout = Derived::Layout, CoordAccess = NumCoords > 0, @@ -249,7 +251,7 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, - internal::unpacket_traits::size); + PacketType::size); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( @@ -300,7 +302,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } @@ -322,7 +324,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, - internal::unpacket_traits::size); + PacketType::size); } EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } @@ -367,7 +369,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } @@ -445,7 +447,7 @@ struct TensorEvaluator::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; typedef typename TensorEvaluator::Dimensions Dimensions; static const int NumDims = internal::array_size< @@ -574,7 +576,7 @@ struct TensorEvaluator::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const @@ -644,7 +646,7 @@ struct TensorEvaluator enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & - internal::packet_traits::HasBlend, + PacketType::HasBlend, BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -665,7 +667,7 @@ struct TensorEvaluator typedef typename XprType::Index Index; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index fdb31928f..5a6555cde 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -93,11 +93,11 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = true, - PacketAccess = (PacketSize > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index fa269b8c6..97c8d4a02 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -90,7 +90,7 @@ struct TensorEvaluator, Device> typedef typename PacketType::type PacketReturnType; enum { IsAligned = false, - PacketAccess = (internal::unpacket_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -137,7 +137,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int packetSize = internal::unpacket_traits::size; + const int packetSize = PacketType::size; EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+packetSize-1 < dimensions().TotalSize()); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 72cb2d15f..00e1186e5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -181,7 +181,7 @@ struct TensorEvaluator, Device> typedef TensorEvaluator Impl; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h index 6147fbdf1..64f2ad81f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -85,7 +85,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = /*TensorEvaluator::IsAligned*/ false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 498488649..9a6431f29 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -423,7 +423,7 @@ struct TensorEvaluator, Devi template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int packetSize = internal::unpacket_traits::size; + const int packetSize = PacketType::size; EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); @@ -584,7 +584,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - const int packetSize = internal::unpacket_traits::size; + const int packetSize = PacketType::size; Index inputIndices[] = {0, 0}; Index indices[] = {index, index + packetSize - 1}; if (static_cast(Layout) == static_cast(ColMajor)) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index ffa22f31e..aa1db3c73 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -91,7 +91,7 @@ struct TensorEvaluator, Device typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = true, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h index 950ac32af..a0a1ad8f4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -88,7 +88,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 375fc0802..bc09d3699 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -407,7 +407,7 @@ struct TensorEvaluator, static const bool InputPacketAccess = TensorEvaluator::PacketAccess; typedef typename internal::remove_const::type CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index bb2768ab1..9193bdd8e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -108,7 +108,7 @@ struct TensorEvaluator, Device typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, @@ -266,7 +266,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return this->m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 39717efaa..b1135f297 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -95,7 +95,7 @@ struct TensorEvaluator, Device> { enum { IsAligned = false, - PacketAccess = (internal::unpacket_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 6b54f40ad..0fc49255d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -107,11 +107,11 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, - PacketAccess = (internal::packet_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -236,11 +236,11 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, - PacketAccess = (internal::packet_traits::size > 1), + PacketAccess = (PacketType::size > 1), BlockAccess = false, RawAccess = false }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index c09513c10..4b69072f2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -107,7 +107,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = /*TensorEvaluator::IsAligned*/false, @@ -287,7 +287,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h index ef199bfb6..3c7d8bbc0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -194,7 +194,7 @@ struct TensorEvaluator, D typedef typename internal::remove_const::type Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits::size; + static const int PacketSize = PacketType::size; enum { IsAligned = false, -- cgit v1.2.3 From af96018b499be64ff0b262cafc7b31f1a907b4c8 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Aug 2018 16:04:44 +0100 Subject: Using the suggested modification. --- Eigen/src/Core/util/Macros.h | 21 +++++---------------- unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 4 ++-- 2 files changed, 7 insertions(+), 18 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 3255b8351..f59b93608 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1077,26 +1077,15 @@ namespace Eigen { #endif #ifdef EIGEN_HAS_VARIADIC_TEMPLATES -// Provide a variadic version of assert which can take a parameter pack as its input -// The eigen_assert macro used here might have been redefined to use other macros such as EIGEN_THROW, such as used in Eigen's test suite, therefore this needs to be defined after the other macros. -// Note that this does not provide as nice a string to assert as a straight forward call to eigen_assert, so we add a message to the assert. -#if defined(EIGEN_NO_DEBUG) -#define eigen_variadic_assert(x) -#else +// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input. namespace Eigen { namespace internal { -inline void variadic_assert(const char*) {} -template inline void variadic_assert(const char* message, bool first, Bools ... others) { - eigen_assert(first && message); - variadic_assert(message, others...); - EIGEN_UNUSED_VARIABLE(first); -} +bool all(){ return true; } +template +bool all(T t, Ts ... ts){ return t && all(ts...); } + } } -#define EIGEN_VARIADIC_ASSERT_MESSAGE(x) EIGEN_MAKESTRING(x) " in " __FILE__ ":" EIGEN_MAKESTRING(__LINE__) -#define eigen_variadic_assert(x) \ - do { Eigen::internal::variadic_assert(EIGEN_VARIADIC_ASSERT_MESSAGE(x), x); } while(false); -#endif #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index f69f8f24a..d1cc0593f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -150,7 +150,7 @@ template class MakePoin EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const { EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_variadic_assert((Eigen::NumTraits::highest() >= otherIndices)...); + eigen_assert(internal::all((Eigen::NumTraits::highest() >= otherIndices)...)); if (PlainObjectType::Options&RowMajor) { const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); return m_data[index]; @@ -238,7 +238,7 @@ template class MakePoin EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) { static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - eigen_variadic_assert((Eigen::NumTraits::highest() >= otherIndices)...); + eigen_assert(internal::all((Eigen::NumTraits::highest() >= otherIndices)...)); const std::size_t NumDims = sizeof...(otherIndices) + 2; if (PlainObjectType::Options&RowMajor) { const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); -- cgit v1.2.3 From c6a5c70712851cd696d7410579506fc299c04a05 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Aug 2018 16:56:26 +0100 Subject: Correcting the position of allocate_temp/deallocate_temp in TensorDeviceGpu.h --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h index 48bbd5e45..b490433db 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h @@ -159,13 +159,6 @@ class GpuStreamDevice : public StreamInterface { err = gpuFree(buffer); gpu_assert(err == gpuSuccess); } - EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { - return stream_->allocate(num_bytes); - } - - EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { - stream_->deallocate(buffer); - } virtual void* scratchpad() const { if (scratch_ == NULL) { @@ -214,6 +207,15 @@ struct GpuDevice { stream_->deallocate(buffer); } + EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return stream_->allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + stream_->deallocate(buffer); + } + + EIGEN_STRONG_INLINE void* scratchpad() const { return stream_->scratchpad(); } -- cgit v1.2.3