aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-11-20 17:42:50 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-11-20 17:42:50 -0800
commit9fa65d38383e203ba968de819697941096c86f03 (patch)
tree27e13ea7d775f36c0417ec44af24015464b72a6b /unsupported
parenta367804856cf3a39d9d43d10ec3ba2e335a8ec3a (diff)
Split TensorDeviceType.h in 3 files to make it more manageable
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/Tensor4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h (renamed from unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h)266
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h61
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h224
4 files changed, 290 insertions, 265 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index 7e59af964..17c4325b8 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -60,7 +60,9 @@
#include "src/Tensor/TensorMacros.h"
#include "src/Tensor/TensorForwardDeclarations.h"
#include "src/Tensor/TensorMeta.h"
-#include "src/Tensor/TensorDeviceType.h"
+#include "src/Tensor/TensorDeviceDefault.h"
+#include "src/Tensor/TensorDeviceThreadPool.h"
+#include "src/Tensor/TensorDeviceCuda.h"
#include "src/Tensor/TensorIndexList.h"
#include "src/Tensor/TensorDimensionList.h"
#include "src/Tensor/TensorDimensions.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index 71fd4a6af..7d80d0b91 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -7,272 +7,12 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
+#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H)
+#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
namespace Eigen {
-// Default device for the machine (typically a single cpu core)
-struct DefaultDevice {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
- return internal::aligned_malloc(num_bytes);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
- internal::aligned_free(buffer);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
- ::memcpy(dst, src, n);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
- ::memset(buffer, c, n);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
-#ifndef __CUDA_ARCH__
- // Running on the host CPU
- return 1;
-#else
- // Running on a CUDA device
- return 32;
-#endif
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
- // Running single threaded on the host CPU
- // Should return an enum that encodes the ISA supported by the CPU
- return 1;
-#else
- // Running on a CUDA device
- return __CUDA_ARCH__ / 100;
-#endif
- }
-};
-
-
-// Multiple cpu cores
-// We should really use a thread pool here but first we need to find a portable thread pool library.
-#ifdef EIGEN_USE_THREADS
-
-// This defines an interface that ThreadPoolDevice can take to use
-// custom thread pools underneath.
-class ThreadPoolInterface {
- public:
- virtual void Schedule(std::function<void()> fn) = 0;
-
- virtual ~ThreadPoolInterface() {}
-};
-
-// The implementation of the ThreadPool type ensures that the Schedule method
-// runs the functions it is provided in FIFO order when the scheduling is done
-// by a single thread.
-class ThreadPool : public ThreadPoolInterface {
- public:
- // Construct a pool that contains "num_threads" threads.
- explicit ThreadPool(int num_threads) {
- for (int i = 0; i < num_threads; i++) {
- threads_.push_back(new std::thread([this]() { WorkerLoop(); }));
- }
- }
-
- // Wait until all scheduled work has finished and then destroy the
- // set of threads.
- ~ThreadPool()
- {
- {
- // Wait for all work to get done.
- std::unique_lock<std::mutex> l(mu_);
- empty_.wait(l, [this]() { return pending_.empty(); });
- exiting_ = true;
-
- // Wakeup all waiters.
- for (auto w : waiters_) {
- w->ready = true;
- w->work = nullptr;
- w->cv.notify_one();
- }
- }
-
- // Wait for threads to finish.
- for (auto t : threads_) {
- t->join();
- delete t;
- }
- }
-
- // Schedule fn() for execution in the pool of threads. The functions are
- // executed in the order in which they are scheduled.
- void Schedule(std::function<void()> fn) {
- std::unique_lock<std::mutex> l(mu_);
- if (waiters_.empty()) {
- pending_.push_back(fn);
- } else {
- Waiter* w = waiters_.back();
- waiters_.pop_back();
- w->ready = true;
- w->work = fn;
- w->cv.notify_one();
- }
- }
-
- protected:
- void WorkerLoop() {
- std::unique_lock<std::mutex> l(mu_);
- Waiter w;
- while (!exiting_) {
- std::function<void()> fn;
- if (pending_.empty()) {
- // Wait for work to be assigned to me
- w.ready = false;
- waiters_.push_back(&w);
- w.cv.wait(l, [&w]() { return w.ready; });
- fn = w.work;
- w.work = nullptr;
- } else {
- // Pick up pending work
- fn = pending_.front();
- pending_.pop_front();
- if (pending_.empty()) {
- empty_.notify_all();
- }
- }
- if (fn) {
- mu_.unlock();
- fn();
- mu_.lock();
- }
- }
- }
-
- private:
- struct Waiter {
- std::condition_variable cv;
- std::function<void()> work;
- bool ready;
- };
-
- std::mutex mu_;
- std::vector<std::thread*> threads_; // All threads
- std::vector<Waiter*> waiters_; // Stack of waiting threads.
- std::deque<std::function<void()>> pending_; // Queue of pending work
- std::condition_variable empty_; // Signaled on pending_.empty()
- bool exiting_ = false;
-};
-
-
-// Notification is an object that allows a user to to wait for another
-// thread to signal a notification that an event has occurred.
-//
-// Multiple threads can wait on the same Notification object.
-// but only one caller must call Notify() on the object.
-class Notification {
- public:
- Notification() : notified_(false) {}
- ~Notification() {}
-
- void Notify() {
- std::unique_lock<std::mutex> l(mu_);
- eigen_assert(!notified_);
- notified_ = true;
- cv_.notify_all();
- }
-
- void WaitForNotification() {
- std::unique_lock<std::mutex> l(mu_);
- cv_.wait(l, [this]() { return notified_; } );
- }
-
- private:
- std::mutex mu_;
- std::condition_variable cv_;
- bool notified_;
-};
-
-// Runs an arbitrary function and then calls Notify() on the passed in
-// Notification.
-template <typename Function, typename... Args> struct FunctionWrapper
-{
- static void run(Notification* n, Function f, Args... args) {
- f(args...);
- n->Notify();
- }
-};
-
-static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) {
- if (n) {
- n->WaitForNotification();
- }
-}
-
-
-// Build a thread pool device on top the an existing pool of threads.
-struct ThreadPoolDevice {
- // The ownership of the thread pool remains with the caller.
- ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
-
- EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
- return internal::aligned_malloc(num_bytes);
- }
-
- EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
- internal::aligned_free(buffer);
- }
-
- EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
- ::memcpy(dst, src, n);
- }
- EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
- EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
-
- EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
- ::memset(buffer, c, n);
- }
-
- EIGEN_STRONG_INLINE size_t numThreads() const {
- return num_threads_;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
- // Should return an enum that encodes the ISA supported by the CPU
- return 1;
- }
-
- template <class Function, class... Args>
- EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
- Notification* n = new Notification();
- std::function<void()> func =
- std::bind(&FunctionWrapper<Function, Args...>::run, n, f, args...);
- pool_->Schedule(func);
- return n;
- }
- template <class Function, class... Args>
- EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const {
- std::function<void()> func = std::bind(f, args...);
- pool_->Schedule(func);
- }
-
- private:
- ThreadPoolInterface* pool_;
- size_t num_threads_;
-};
-
-#endif
-
-
-// GPU offloading
-#ifdef EIGEN_USE_GPU
-
// This defines an interface that GPUDevice can take to use
// CUDA streams underneath.
class StreamInterface {
@@ -515,8 +255,6 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
}
#endif
-#endif
-
} // end namespace Eigen
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
new file mode 100644
index 000000000..267f6f8e3
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -0,0 +1,61 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
+#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
+
+
+namespace Eigen {
+
+// Default device for the machine (typically a single cpu core)
+struct DefaultDevice {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
+ return internal::aligned_malloc(num_bytes);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
+ internal::aligned_free(buffer);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
+ ::memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
+ ::memset(buffer, c, n);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
+#ifndef __CUDA_ARCH__
+ // Running on the host CPU
+ return 1;
+#else
+ // Running on a CUDA device
+ return 32;
+#endif
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
+#ifndef __CUDA_ARCH__
+ // Running single threaded on the host CPU
+ // Should return an enum that encodes the ISA supported by the CPU
+ return 1;
+#else
+ // Running on a CUDA device
+ return __CUDA_ARCH__ / 100;
+#endif
+ }
+};
+
+} // namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
new file mode 100644
index 000000000..dcbef5b03
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -0,0 +1,224 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H)
+#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H
+
+namespace Eigen {
+
+// This defines an interface that ThreadPoolDevice can take to use
+// custom thread pools underneath.
+class ThreadPoolInterface {
+ public:
+ virtual void Schedule(std::function<void()> fn) = 0;
+
+ virtual ~ThreadPoolInterface() {}
+};
+
+// The implementation of the ThreadPool type ensures that the Schedule method
+// runs the functions it is provided in FIFO order when the scheduling is done
+// by a single thread.
+class ThreadPool : public ThreadPoolInterface {
+ public:
+ // Construct a pool that contains "num_threads" threads.
+ explicit ThreadPool(int num_threads) {
+ for (int i = 0; i < num_threads; i++) {
+ threads_.push_back(new std::thread([this]() { WorkerLoop(); }));
+ }
+ }
+
+ // Wait until all scheduled work has finished and then destroy the
+ // set of threads.
+ ~ThreadPool()
+ {
+ {
+ // Wait for all work to get done.
+ std::unique_lock<std::mutex> l(mu_);
+ empty_.wait(l, [this]() { return pending_.empty(); });
+ exiting_ = true;
+
+ // Wakeup all waiters.
+ for (auto w : waiters_) {
+ w->ready = true;
+ w->work = nullptr;
+ w->cv.notify_one();
+ }
+ }
+
+ // Wait for threads to finish.
+ for (auto t : threads_) {
+ t->join();
+ delete t;
+ }
+ }
+
+ // Schedule fn() for execution in the pool of threads. The functions are
+ // executed in the order in which they are scheduled.
+ void Schedule(std::function<void()> fn) {
+ std::unique_lock<std::mutex> l(mu_);
+ if (waiters_.empty()) {
+ pending_.push_back(fn);
+ } else {
+ Waiter* w = waiters_.back();
+ waiters_.pop_back();
+ w->ready = true;
+ w->work = fn;
+ w->cv.notify_one();
+ }
+ }
+
+ protected:
+ void WorkerLoop() {
+ std::unique_lock<std::mutex> l(mu_);
+ Waiter w;
+ while (!exiting_) {
+ std::function<void()> fn;
+ if (pending_.empty()) {
+ // Wait for work to be assigned to me
+ w.ready = false;
+ waiters_.push_back(&w);
+ w.cv.wait(l, [&w]() { return w.ready; });
+ fn = w.work;
+ w.work = nullptr;
+ } else {
+ // Pick up pending work
+ fn = pending_.front();
+ pending_.pop_front();
+ if (pending_.empty()) {
+ empty_.notify_all();
+ }
+ }
+ if (fn) {
+ mu_.unlock();
+ fn();
+ mu_.lock();
+ }
+ }
+ }
+
+ private:
+ struct Waiter {
+ std::condition_variable cv;
+ std::function<void()> work;
+ bool ready;
+ };
+
+ std::mutex mu_;
+ std::vector<std::thread*> threads_; // All threads
+ std::vector<Waiter*> waiters_; // Stack of waiting threads.
+ std::deque<std::function<void()>> pending_; // Queue of pending work
+ std::condition_variable empty_; // Signaled on pending_.empty()
+ bool exiting_ = false;
+};
+
+
+// Notification is an object that allows a user to to wait for another
+// thread to signal a notification that an event has occurred.
+//
+// Multiple threads can wait on the same Notification object.
+// but only one caller must call Notify() on the object.
+class Notification {
+ public:
+ Notification() : notified_(false) {}
+ ~Notification() {}
+
+ void Notify() {
+ std::unique_lock<std::mutex> l(mu_);
+ eigen_assert(!notified_);
+ notified_ = true;
+ cv_.notify_all();
+ }
+
+ void WaitForNotification() {
+ std::unique_lock<std::mutex> l(mu_);
+ cv_.wait(l, [this]() { return notified_; } );
+ }
+
+ private:
+ std::mutex mu_;
+ std::condition_variable cv_;
+ bool notified_;
+};
+
+// Runs an arbitrary function and then calls Notify() on the passed in
+// Notification.
+template <typename Function, typename... Args> struct FunctionWrapper
+{
+ static void run(Notification* n, Function f, Args... args) {
+ f(args...);
+ n->Notify();
+ }
+};
+
+static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) {
+ if (n) {
+ n->WaitForNotification();
+ }
+}
+
+
+// Build a thread pool device on top the an existing pool of threads.
+struct ThreadPoolDevice {
+ // The ownership of the thread pool remains with the caller.
+ ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
+
+ EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
+ return internal::aligned_malloc(num_bytes);
+ }
+
+ EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
+ internal::aligned_free(buffer);
+ }
+
+ EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
+ ::memcpy(dst, src, n);
+ }
+ EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+ EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
+ memcpy(dst, src, n);
+ }
+
+ EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
+ ::memset(buffer, c, n);
+ }
+
+ EIGEN_STRONG_INLINE size_t numThreads() const {
+ return num_threads_;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
+ // Should return an enum that encodes the ISA supported by the CPU
+ return 1;
+ }
+
+ template <class Function, class... Args>
+ EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
+ Notification* n = new Notification();
+ std::function<void()> func =
+ std::bind(&FunctionWrapper<Function, Args...>::run, n, f, args...);
+ pool_->Schedule(func);
+ return n;
+ }
+ template <class Function, class... Args>
+ EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const {
+ std::function<void()> func = std::bind(f, args...);
+ pool_->Schedule(func);
+ }
+
+ private:
+ ThreadPoolInterface* pool_;
+ size_t num_threads_;
+};
+
+
+} // end namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H