From f7362772e3236cdb8ae4d9be175f83a0b19902a0 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 24 Dec 2015 21:15:38 -0800 Subject: Add digamma for CPU + CUDA. Includes tests. --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 392acf302..cca716d6f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -128,6 +128,12 @@ class TensorBase return unaryExpr(internal::scalar_lgamma_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + digamma() const { + return unaryExpr(internal::scalar_digamma_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> erf() const { -- cgit v1.2.3 From 63fb66f53a576e4ae7bd6b28d011a7e33b7757de Mon Sep 17 00:00:00 2001 From: Ville Kallioniemi Date: Sun, 17 Jan 2016 21:25:36 -0700 Subject: Add ctor for long --- unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h index 4f2adb671..19352eb5e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -40,6 +40,12 @@ struct TensorUInt128 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TensorUInt128(unsigned int x) : high(0), low(x) { } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(long x) : high(0), low(x) { + eigen_assert(x >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(unsigned long x) : high(0), low(x) { } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TensorUInt128(int64_t x) : high(0), low(x) { eigen_assert(x >= 0); } -- cgit v1.2.3 From 2832175a689313ba08523489a1a1b8bb6458ac5c Mon Sep 17 00:00:00 2001 From: Ville Kallioniemi Date: Tue, 19 Jan 2016 20:12:17 -0700 Subject: Use explicitly 32 bit integer types in constructors. --- unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h index 19352eb5e..f43f64cde 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -34,17 +34,11 @@ struct TensorUInt128 LOW low; EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(int x) : high(0), low(x) { + TensorUInt128(int32_t x) : high(0), low(x) { eigen_assert(x >= 0); } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(unsigned int x) : high(0), low(x) { } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(long x) : high(0), low(x) { - eigen_assert(x >= 0); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(unsigned long x) : high(0), low(x) { } + TensorUInt128(uint32_t x) : high(0), low(x) { } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TensorUInt128(int64_t x) : high(0), low(x) { eigen_assert(x >= 0); -- cgit v1.2.3 From 3aeeca32af00b1921b4424d7be2e03bbaeaa05b4 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 22 Jan 2016 16:36:30 -0800 Subject: Leverage the new blocking code in the tensor contraction code. --- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 4 +--- unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h | 5 +++-- unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | 8 ++++---- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 624e814e2..e6a008ba7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -582,10 +582,8 @@ struct TensorEvaluator BlockingType; - // Sizes of the blocks to load in cache. See the Goto paper for details. - BlockingType blocking(m, n, k, 1, true); + internal::TensorContractionBlocking blocking(k, m, n, 1); const Index kc = blocking.kc(); const Index mc = numext::mini(m, blocking.mc()); const Index nc = numext::mini(n, blocking.nc()); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index 9b6d18090..63c8ae126 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -426,15 +426,16 @@ class TensorContractionSubMapper { }; -template class TensorContractionInputMapper - : public BaseTensorContractionMapper { + : public BaseTensorContractionMapper { public: + typedef Scalar_ Scalar; typedef BaseTensorContractionMapper Base; typedef TensorContractionSubMapper SubMapper; typedef SubMapper VectorMapper; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 576bea295..51a3b9490 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -176,10 +176,10 @@ struct TensorEvaluatorm_device.numThreads(); - Index mc = m; - Index nc = n; - Index kc = k; - internal::computeProductBlockingSizes(kc, mc, nc, num_threads); + internal::TensorContractionBlocking blocking(k, m, n, num_threads); + Index mc = blocking.mc(); + Index nc = blocking.nc(); + Index kc = blocking.kc(); eigen_assert(mc <= m); eigen_assert(nc <= n); eigen_assert(kc <= k); -- cgit v1.2.3 From bd207ce11e8133874d5a12573921ea93874a0f9e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 24 Jan 2016 20:36:05 -0800 Subject: Added missing EIGEN_DEVICE_FUNC qualifier --- unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index e7daf7304..bd83d5de8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -136,7 +136,7 @@ struct TensorEvaluator, Device> } template - EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return internal::ploadt(m_buffer + index); } -- cgit v1.2.3 From e3a15a03a4fe758ed0a00f3a2b083d7ca58ca16b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 24 Jan 2016 23:04:50 -0800 Subject: Don't explicitely evaluate the subexpression from TensorForcedEval::evalSubExprIfNeeded, as it will be done when executing the EvalTo subexpression --- unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index c9b0b2f28..58b864787 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -106,7 +106,6 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_impl.evalSubExprsIfNeeded(NULL); const Index numValues = m_impl.dimensions().TotalSize(); m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); // Should initialize the memory in case we're dealing with non POD types. @@ -119,7 +118,6 @@ struct TensorEvaluator, Device> EvalTo evalToTmp(m_buffer, m_op); const bool PacketAccess = internal::IsVectorizable::value; internal::TensorExecutor::run(evalToTmp, m_device); - m_impl.cleanup(); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { -- cgit v1.2.3 From 291069e885dccad6059e4bda34aad30ab69cbd85 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 27 Jan 2016 15:37:03 -0800 Subject: Fixed some compilation problems with nvcc + clang --- Eigen/src/Core/util/Memory.h | 6 +++--- unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 823e077af..415bc48cb 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -526,9 +526,9 @@ template EIGEN_DEVICE_FUNC inline void conditional_align template EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) { - static const Index ScalarSize = sizeof(Scalar); - static const Index AlignmentSize = Alignment / ScalarSize; - static const Index AlignmentMask = AlignmentSize-1; + const Index ScalarSize = sizeof(Scalar); + const Index AlignmentSize = Alignment / ScalarSize; + const Index AlignmentMask = AlignmentSize-1; if(AlignmentSize<=1) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 09ee0c2c6..7a5dfbfea 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -346,7 +346,7 @@ struct InnerReducer { static const bool HasOptimizedImplementation = false; static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { - assert(false && "Not implemented"); + eigen_assert(false && "Not implemented"); } }; @@ -356,7 +356,7 @@ struct OuterReducer { static const bool HasOptimizedImplementation = false; static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { - assert(false && "Not implemented"); + eigen_assert(false && "Not implemented"); } }; -- cgit v1.2.3 From 4bf9eaf77aa8c9a75b5d60c781d5d86b833b93d1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 27 Jan 2016 17:09:30 -0800 Subject: Deleted an invalid assertion that prevented the assignment of empty tensors. --- unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h | 1 - unsupported/test/CMakeLists.txt | 1 + unsupported/test/cxx11_tensor_empty.cpp | 36 ++++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 unsupported/test/cxx11_tensor_empty.cpp (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index 98631fc7f..18a916e46 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -105,7 +105,6 @@ class TensorStorage, Options_> EIGEN_DEVICE_FUNC void resize(Index size, const array& nbDimensions) { - eigen_assert(size >= 1); const Index currentSz = internal::array_prod(m_dimensions); if(size != currentSz) { diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 3a90a5371..d70bf2b88 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -147,6 +147,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_sugar "-std=c++0x") ei_add_test(cxx11_tensor_fft "-std=c++0x") ei_add_test(cxx11_tensor_ifft "-std=c++0x") + ei_add_test(cxx11_tensor_empty "-std=c++0x") endif() diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp new file mode 100644 index 000000000..ca03a297c --- /dev/null +++ b/unsupported/test/cxx11_tensor_empty.cpp @@ -0,0 +1,36 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + + +static void test_empty_tensor() +{ + Tensor source; + Tensor tgt1 = source; + Tensor tgt2; + tgt2 = source; +} + +static void test_empty_fixed_size_tensor() +{ + TensorFixedSize> source; + TensorFixedSize> tgt1 = source; + TensorFixedSize> tgt2; + tgt2 = source; +} + + +void test_cxx11_tensor_empty() +{ + CALL_SUBTEST(test_empty_tensor()); + CALL_SUBTEST(test_empty_fixed_size_tensor()); +} -- cgit v1.2.3 From c5d25bf1d014f7ef87d55901b591d24a32ee8f4a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 28 Jan 2016 23:15:45 -0800 Subject: Fixed a couple of compilation warnings. --- unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 7a5dfbfea..a03b52629 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -345,7 +345,7 @@ template struct InnerReducer { static const bool HasOptimizedImplementation = false; - static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + EIGEN_DEVICE_FUNC static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { eigen_assert(false && "Not implemented"); } }; @@ -355,7 +355,7 @@ template struct OuterReducer { static const bool HasOptimizedImplementation = false; - static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + EIGEN_DEVICE_FUNC static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { eigen_assert(false && "Not implemented"); } }; -- cgit v1.2.3 From 963f2d2a8f33eebf90b3ae1944423aa875281469 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 28 Jan 2016 23:37:48 -0800 Subject: Marked several methods EIGEN_DEVICE_FUNC --- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e6a008ba7..1adb68894 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -378,7 +378,7 @@ struct TensorContractionEvaluatorBase } template - void evalGemv(Scalar* buffer) const { + EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const { const Index rows = m_i_size; const Index cols = m_k_size; @@ -516,7 +516,7 @@ struct TensorEvaluator - void evalProduct(Scalar* buffer) const { + EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { if (this->m_j_size == 1) { this->template evalGemv(buffer); return; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h index 78ed5038f..3d3f6904f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -28,7 +28,7 @@ class TensorContractionBlocking { typedef typename LhsMapper::Scalar LhsScalar; typedef typename RhsMapper::Scalar RhsScalar; - TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : + EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : kc_(k), mc_(m), nc_(n) { if (ShardingType == ShardByCol) { @@ -41,9 +41,9 @@ class TensorContractionBlocking { } } - EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } - EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } - EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } private: Index kc_; -- cgit v1.2.3 From 6720b38fbf60d750393af7d63777b06438ba5d81 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 31 Jan 2016 16:48:50 -0800 Subject: Fixed a few compilation warnings --- unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h | 5 ++++- unsupported/test/cxx11_tensor_empty.cpp | 12 ++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index 18a916e46..ed933b6ac 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -41,7 +41,10 @@ class TensorStorage private: static const std::size_t Size = FixedDimensions::total_size; - EIGEN_ALIGN_MAX T m_data[Size]; + // Allocate an array of size at least one to prevent compiler warnings. + static const std::size_t MinSize = max_n_1::size; + EIGEN_ALIGN_MAX T m_data[MinSize]; + FixedDimensions m_dimensions; public: diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp index ca03a297c..9130fff35 100644 --- a/unsupported/test/cxx11_tensor_empty.cpp +++ b/unsupported/test/cxx11_tensor_empty.cpp @@ -16,16 +16,20 @@ static void test_empty_tensor() { Tensor source; Tensor tgt1 = source; - Tensor tgt2; - tgt2 = source; + Tensor tgt2(source); + Tensor tgt3; + tgt3 = tgt1; + tgt3 = tgt2; } static void test_empty_fixed_size_tensor() { TensorFixedSize> source; TensorFixedSize> tgt1 = source; - TensorFixedSize> tgt2; - tgt2 = source; + TensorFixedSize> tgt2(source); + TensorFixedSize> tgt3; + tgt3 = tgt1; + tgt3 = tgt2; } -- cgit v1.2.3 From e80ed948e14c2de929a97bfbacab0b3a9172a59e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 31 Jan 2016 20:09:41 -0800 Subject: Fixed a number of compilation warnings generated by the cuda tests --- .../Eigen/CXX11/src/Core/util/EmulateArray.h | 39 ++++++++++++++++++++-- .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 8 ++--- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 4 +-- 3 files changed, 42 insertions(+), 9 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h index 456b34d0b..89aeb03e7 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h @@ -25,6 +25,16 @@ template class array { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& front() { return values[0]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& front() const { return values[0]; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& back() { return values[n-1]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static std::size_t size() { return n; } @@ -123,13 +133,33 @@ template class array { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& operator[] (size_t) { eigen_assert(false && "Can't index a zero size array"); - return *static_cast(NULL); + return dummy; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator[] (size_t) const { eigen_assert(false && "Can't index a zero size array"); - return *static_cast(NULL); + return dummy; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& front() { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& front() const { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& back() { + eigen_assert(false && "Can't index a zero size array"); + return dummy; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& back() const { + eigen_assert(false && "Can't index a zero size array"); + return dummy; } static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } @@ -142,6 +172,9 @@ template class array { eigen_assert(l.size() == 0); } #endif + + private: + T dummy; }; namespace internal { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 367a152a0..67c797802 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -21,7 +21,7 @@ namespace Eigen { */ namespace internal { -template +template class IndexMapper { public: IndexMapper(const InputDims& input_dims, const array& kernel_dims, @@ -123,7 +123,7 @@ class IndexMapper { } inputIndex += p * m_inputStrides[NumKernelDims]; } else { - int limit = 0; + std::ptrdiff_t limit = 0; if (NumKernelDims < NumDims) { limit = NumDims - NumKernelDims - 1; } @@ -147,7 +147,7 @@ class IndexMapper { } outputIndex += p * m_outputStrides[NumKernelDims]; } else { - int limit = 0; + std::ptrdiff_t limit = 0; if (NumKernelDims < NumDims) { limit = NumDims - NumKernelDims - 1; } @@ -206,7 +206,7 @@ class IndexMapper { } private: - static const size_t NumDims = internal::array_size::value; + static const int NumDims = internal::array_size::value; array m_inputStrides; array m_outputStrides; array m_cudaInputStrides; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index a03b52629..22aea5ea4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -463,7 +463,7 @@ struct TensorEvaluator, Device> m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; } } else { - m_outputStrides[NumOutputDims - 1] = 1; + m_outputStrides.back() = 1; for (int i = NumOutputDims - 2; i >= 0; --i) { m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; } @@ -479,7 +479,7 @@ struct TensorEvaluator, Device> input_strides[i] = input_strides[i-1] * input_dims[i-1]; } } else { - input_strides[NumInputDims - 1] = 1; + input_strides.back() = 1; for (int i = NumInputDims - 2; i >= 0; --i) { input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; } -- cgit v1.2.3 From 6b5dff875e4ba2235f255b7cf0a86b7abed21df0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 1 Feb 2016 12:46:32 -0800 Subject: Made it possible to limit the number of blocks that will be used to evaluate a tensor expression on a CUDA device. This makesit possible to set aside streaming multiprocessors for other computations. --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 12 +++++++++--- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 5abdc489b..e684ab8f7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface { struct GpuDevice { // The StreamInterface is not owned: the caller is // responsible for its initialization and eventual destruction. - explicit GpuDevice(const StreamInterface* stream) : stream_(stream) { + explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { + eigen_assert(stream); + } + explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { eigen_assert(stream); } - // TODO(bsteiner): This is an internal API, we should not expose it. EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return stream_->stream(); @@ -246,6 +248,10 @@ struct GpuDevice { #endif } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const { + return max_blocks_; + } + // This function checks if the CUDA runtime recorded an error for the // underlying stream device. inline bool ok() const { @@ -259,7 +265,7 @@ struct GpuDevice { private: const StreamInterface* stream_; - + int max_blocks_; }; #ifndef __CUDA_ARCH__ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index d2ab70f2b..df15c6204 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -220,7 +220,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor::run( if (needs_assign) { const int block_size = device.maxCudaThreadsPerBlock(); - const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; + const int max_blocks = numext::maxi(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size); const Index size = array_prod(evaluator.dimensions()); // Create a least one block to ensure we won't crash if we're called with tensors of size 0. const int num_blocks = numext::maxi(numext::mini(max_blocks, (size + block_size - 1) / block_size), 1); @@ -239,7 +239,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor::run(c if (needs_assign) { const int block_size = device.maxCudaThreadsPerBlock(); - const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; + const int max_blocks = numext::maxi(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size); const Index size = array_prod(evaluator.dimensions()); // Create a least one block to ensure we won't crash if we're called with tensors of size 0. const int num_blocks = numext::maxi(numext::mini(max_blocks, (size + block_size - 1) / block_size), 1); -- cgit v1.2.3