aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-08 16:37:27 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-08 16:37:27 -0800
commit46177c8d648a27d82d34cebed7e2b5bc59d441fc (patch)
tree97a356d04f124ea1ad32eda38e76c607e6b33e5e /unsupported
parent6d6413f76832a094d0835770af2adfaabba24738 (diff)
Replace std::vector with our own implementation, as using the stl when compiling with nvcc and avx enabled leads to many issues.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/Core1
-rw-r--r--unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h130
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h16
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h3
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h10
5 files changed, 143 insertions, 17 deletions
diff --git a/unsupported/Eigen/CXX11/Core b/unsupported/Eigen/CXX11/Core
index e3e2cb60c..946145f5a 100644
--- a/unsupported/Eigen/CXX11/Core
+++ b/unsupported/Eigen/CXX11/Core
@@ -33,6 +33,7 @@
#include <vector>
#include "src/Core/util/EmulateArray.h"
+#include "src/Core/util/MaxSizeVector.h"
// Emulate the cxx11 functionality that we need if the compiler doesn't support it.
// Visual studio 2015 doesn't advertise itself as cxx11 compliant, although it
diff --git a/unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h
new file mode 100644
index 000000000..551124bae
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h
@@ -0,0 +1,130 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FIXEDSIZEVECTOR_H
+#define EIGEN_FIXEDSIZEVECTOR_H
+
+namespace Eigen {
+
+/** \class MaxSizeVector
+ * \ingroup Core
+ *
+ * \brief The MaxSizeVector class.
+ *
+ * The %MaxSizeVector provides a subset of std::vector functionality.
+ *
+ * The goal is to provide basic std::vector operations when using
+ * std::vector is not an option (e.g. on GPU or when compiling using
+ * FMA/AVX, as this can cause either compilation failures or illegal
+ * instruction failures).
+ *
+ * Beware: The constructors are not API compatible with these of
+ * std::vector.
+ */
+template <typename T>
+class MaxSizeVector {
+ public:
+ // Construct a new MaxSizeVector, reserve n elements.
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit MaxSizeVector(size_t n)
+ : reserve_(n), size_(0),
+ data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
+ for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; }
+ }
+
+ // Construct a new MaxSizeVector, reserve and resize to n.
+ // Copy the init value to all elements.
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit MaxSizeVector(size_t n, const T& init)
+ : reserve_(n), size_(n),
+ data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
+ for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); }
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ ~MaxSizeVector() {
+ for (size_t i = 0; i < size_; ++i) {
+ data_[i].~T();
+ }
+ internal::aligned_free(data_);
+ }
+
+ // Append new elements (up to reserved size).
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ void push_back(const T& t) {
+ eigen_assert(size_ < reserve_);
+ data_[size_++] = t;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const T& operator[] (size_t i) const {
+ eigen_assert(i < size_);
+ return data_[i];
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ T& operator[] (size_t i) {
+ eigen_assert(i < size_);
+ return data_[i];
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ T& back() {
+ eigen_assert(size_ > 0);
+ return data_[size_ - 1];
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const T& back() const {
+ eigen_assert(size_ > 0);
+ return data_[size_ - 1];
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ void pop_back() {
+ // NOTE: This does not destroy the value at the end the way
+ // std::vector's version of pop_back() does. That happens when
+ // the Vector is destroyed.
+ eigen_assert(size_ > 0);
+ size_--;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ size_t size() const { return size_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ bool empty() const { return size_ == 0; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ T* data() { return data_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const T* data() const { return data_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ T* begin() { return data_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ T* end() { return data_ + size_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const T* begin() const { return data_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const T* end() const { return data_ + size_; }
+
+ private:
+ size_t reserve_;
+ size_t size_;
+ T* data_;
+};
+
+} // namespace Eigen
+
+#endif // EIGEN_FIXEDSIZEVECTOR_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index 02b3c6dea..9044454fd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -28,7 +28,7 @@ struct packLhsArg {
template<typename LhsScalar, typename RhsScalar, typename RhsMapper, typename OutputMapper, typename Index>
struct packRhsAndKernelArg {
- const std::vector<LhsScalar*>* blockAs;
+ const MaxSizeVector<LhsScalar*>* blockAs;
RhsScalar* blockB;
const RhsMapper& rhs;
OutputMapper& output;
@@ -46,8 +46,8 @@ struct packRhsAndKernelArg {
const Index n_block_idx;
const Index m_blocks;
const Index n_blocks;
- std::vector<Notification*>* kernel_notifications;
- const std::vector<Notification*>* lhs_notifications;
+ MaxSizeVector<Notification*>* kernel_notifications;
+ const MaxSizeVector<Notification*>* lhs_notifications;
const bool need_to_pack;
};
@@ -202,8 +202,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// the alignment requirements with the assumption that
// (Traits::mr * sizeof(ResScalar)) % 16 == 0
const Index numBlockAs = numext::mini(num_threads, m_blocks);
- std::vector<LhsScalar *> blockAs;
- blockAs.reserve(num_threads);
+ MaxSizeVector<LhsScalar *> blockAs(num_threads);
for (int i = 0; i < num_threads; i++) {
blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))));
}
@@ -212,18 +211,17 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful.
// Other options: (1) reuse memory when a thread finishes. con: tricky
// (2) allocate block B memory in each thread. con: overhead
- std::vector<RhsScalar *> blockBs;
- blockBs.reserve(n_blocks);
+ MaxSizeVector<RhsScalar *> blockBs(n_blocks);
for (int i = 0; i < n_blocks; i++) {
blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))));
}
// lhs_notifications starts with all null Notifications
- std::vector<Notification*> lhs_notifications(num_threads, nullptr);
+ MaxSizeVector<Notification*> lhs_notifications(num_threads, nullptr);
// this should really be numBlockAs * n_blocks;
const Index num_kernel_notifications = num_threads * n_blocks;
- std::vector<Notification*> kernel_notifications(num_kernel_notifications,
+ MaxSizeVector<Notification*> kernel_notifications(num_kernel_notifications,
nullptr);
for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index fd9919829..54da77bcf 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -127,8 +127,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize)));
const Index numblocks = size / blocksize;
- std::vector<Notification*> results;
- results.reserve(numblocks);
+ MaxSizeVector<Notification*> results(numblocks);
for (int i = 0; i < numblocks; ++i) {
results.push_back(device.enqueue(&EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize));
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 875155243..2d7fb80d4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -256,9 +256,8 @@ struct FullReducer<Self, Op, ThreadPoolDevice, false> {
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
eigen_assert(num_coeffs >= numblocks * blocksize);
- std::vector<Notification*> results;
- results.reserve(numblocks);
- std::vector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
+ MaxSizeVector<Notification*> results(numblocks);
+ MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
for (Index i = 0; i < numblocks; ++i) {
results.push_back(
device.enqueue(&FullReducerShard<Self, Op, false>::run, self,
@@ -308,9 +307,8 @@ struct FullReducer<Self, Op, ThreadPoolDevice, true> {
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
eigen_assert(num_coeffs >= numblocks * blocksize);
- std::vector<Notification*> results;
- results.reserve(numblocks);
- std::vector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
+ MaxSizeVector<Notification*> results(numblocks);
+ MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
for (Index i = 0; i < numblocks; ++i) {
results.push_back(device.enqueue(&FullReducerShard<Self, Op, true>::run,
self, i * blocksize, blocksize, reducer,