diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-11-20 17:42:50 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-11-20 17:42:50 -0800 |
commit | 9fa65d38383e203ba968de819697941096c86f03 (patch) | |
tree | 27e13ea7d775f36c0417ec44af24015464b72a6b /unsupported/Eigen/CXX11 | |
parent | a367804856cf3a39d9d43d10ec3ba2e335a8ec3a (diff) |
Split TensorDeviceType.h in 3 files to make it more manageable
Diffstat (limited to 'unsupported/Eigen/CXX11')
-rw-r--r-- | unsupported/Eigen/CXX11/Tensor | 4 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h (renamed from unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h) | 266 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h | 61 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 224 |
4 files changed, 290 insertions, 265 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 7e59af964..17c4325b8 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -60,7 +60,9 @@ #include "src/Tensor/TensorMacros.h" #include "src/Tensor/TensorForwardDeclarations.h" #include "src/Tensor/TensorMeta.h" -#include "src/Tensor/TensorDeviceType.h" +#include "src/Tensor/TensorDeviceDefault.h" +#include "src/Tensor/TensorDeviceThreadPool.h" +#include "src/Tensor/TensorDeviceCuda.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" #include "src/Tensor/TensorDimensions.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 71fd4a6af..7d80d0b91 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -7,272 +7,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H +#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H namespace Eigen { -// Default device for the machine (typically a single cpu core) -struct DefaultDevice { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { -#ifndef __CUDA_ARCH__ - // Running on the host CPU - return 1; -#else - // Running on a CUDA device - return 32; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ - // Running single threaded on the host CPU - // Should return an enum that encodes the ISA supported by the CPU - return 1; -#else - // Running on a CUDA device - return __CUDA_ARCH__ / 100; -#endif - } -}; - - -// Multiple cpu cores -// We should really use a thread pool here but first we need to find a portable thread pool library. -#ifdef EIGEN_USE_THREADS - -// This defines an interface that ThreadPoolDevice can take to use -// custom thread pools underneath. -class ThreadPoolInterface { - public: - virtual void Schedule(std::function<void()> fn) = 0; - - virtual ~ThreadPoolInterface() {} -}; - -// The implementation of the ThreadPool type ensures that the Schedule method -// runs the functions it is provided in FIFO order when the scheduling is done -// by a single thread. -class ThreadPool : public ThreadPoolInterface { - public: - // Construct a pool that contains "num_threads" threads. - explicit ThreadPool(int num_threads) { - for (int i = 0; i < num_threads; i++) { - threads_.push_back(new std::thread([this]() { WorkerLoop(); })); - } - } - - // Wait until all scheduled work has finished and then destroy the - // set of threads. - ~ThreadPool() - { - { - // Wait for all work to get done. - std::unique_lock<std::mutex> l(mu_); - empty_.wait(l, [this]() { return pending_.empty(); }); - exiting_ = true; - - // Wakeup all waiters. - for (auto w : waiters_) { - w->ready = true; - w->work = nullptr; - w->cv.notify_one(); - } - } - - // Wait for threads to finish. - for (auto t : threads_) { - t->join(); - delete t; - } - } - - // Schedule fn() for execution in the pool of threads. The functions are - // executed in the order in which they are scheduled. - void Schedule(std::function<void()> fn) { - std::unique_lock<std::mutex> l(mu_); - if (waiters_.empty()) { - pending_.push_back(fn); - } else { - Waiter* w = waiters_.back(); - waiters_.pop_back(); - w->ready = true; - w->work = fn; - w->cv.notify_one(); - } - } - - protected: - void WorkerLoop() { - std::unique_lock<std::mutex> l(mu_); - Waiter w; - while (!exiting_) { - std::function<void()> fn; - if (pending_.empty()) { - // Wait for work to be assigned to me - w.ready = false; - waiters_.push_back(&w); - w.cv.wait(l, [&w]() { return w.ready; }); - fn = w.work; - w.work = nullptr; - } else { - // Pick up pending work - fn = pending_.front(); - pending_.pop_front(); - if (pending_.empty()) { - empty_.notify_all(); - } - } - if (fn) { - mu_.unlock(); - fn(); - mu_.lock(); - } - } - } - - private: - struct Waiter { - std::condition_variable cv; - std::function<void()> work; - bool ready; - }; - - std::mutex mu_; - std::vector<std::thread*> threads_; // All threads - std::vector<Waiter*> waiters_; // Stack of waiting threads. - std::deque<std::function<void()>> pending_; // Queue of pending work - std::condition_variable empty_; // Signaled on pending_.empty() - bool exiting_ = false; -}; - - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object. -// but only one caller must call Notify() on the object. -class Notification { - public: - Notification() : notified_(false) {} - ~Notification() {} - - void Notify() { - std::unique_lock<std::mutex> l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void WaitForNotification() { - std::unique_lock<std::mutex> l(mu_); - cv_.wait(l, [this]() { return notified_; } ); - } - - private: - std::mutex mu_; - std::condition_variable cv_; - bool notified_; -}; - -// Runs an arbitrary function and then calls Notify() on the passed in -// Notification. -template <typename Function, typename... Args> struct FunctionWrapper -{ - static void run(Notification* n, Function f, Args... args) { - f(args...); - n->Notify(); - } -}; - -static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { - if (n) { - n->WaitForNotification(); - } -} - - -// Build a thread pool device on top the an existing pool of threads. -struct ThreadPoolDevice { - // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } - - EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - - EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - - EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - - EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_STRONG_INLINE size_t numThreads() const { - return num_threads_; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { - // Should return an enum that encodes the ISA supported by the CPU - return 1; - } - - template <class Function, class... Args> - EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { - Notification* n = new Notification(); - std::function<void()> func = - std::bind(&FunctionWrapper<Function, Args...>::run, n, f, args...); - pool_->Schedule(func); - return n; - } - template <class Function, class... Args> - EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { - std::function<void()> func = std::bind(f, args...); - pool_->Schedule(func); - } - - private: - ThreadPoolInterface* pool_; - size_t num_threads_; -}; - -#endif - - -// GPU offloading -#ifdef EIGEN_USE_GPU - // This defines an interface that GPUDevice can take to use // CUDA streams underneath. class StreamInterface { @@ -515,8 +255,6 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { } #endif -#endif - } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h new file mode 100644 index 000000000..267f6f8e3 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU + return 1; +#else + // Running on a CUDA device + return 32; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + // Should return an enum that encodes the ISA supported by the CPU + return 1; +#else + // Running on a CUDA device + return __CUDA_ARCH__ / 100; +#endif + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h new file mode 100644 index 000000000..dcbef5b03 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -0,0 +1,224 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H + +namespace Eigen { + +// This defines an interface that ThreadPoolDevice can take to use +// custom thread pools underneath. +class ThreadPoolInterface { + public: + virtual void Schedule(std::function<void()> fn) = 0; + + virtual ~ThreadPoolInterface() {} +}; + +// The implementation of the ThreadPool type ensures that the Schedule method +// runs the functions it is provided in FIFO order when the scheduling is done +// by a single thread. +class ThreadPool : public ThreadPoolInterface { + public: + // Construct a pool that contains "num_threads" threads. + explicit ThreadPool(int num_threads) { + for (int i = 0; i < num_threads; i++) { + threads_.push_back(new std::thread([this]() { WorkerLoop(); })); + } + } + + // Wait until all scheduled work has finished and then destroy the + // set of threads. + ~ThreadPool() + { + { + // Wait for all work to get done. + std::unique_lock<std::mutex> l(mu_); + empty_.wait(l, [this]() { return pending_.empty(); }); + exiting_ = true; + + // Wakeup all waiters. + for (auto w : waiters_) { + w->ready = true; + w->work = nullptr; + w->cv.notify_one(); + } + } + + // Wait for threads to finish. + for (auto t : threads_) { + t->join(); + delete t; + } + } + + // Schedule fn() for execution in the pool of threads. The functions are + // executed in the order in which they are scheduled. + void Schedule(std::function<void()> fn) { + std::unique_lock<std::mutex> l(mu_); + if (waiters_.empty()) { + pending_.push_back(fn); + } else { + Waiter* w = waiters_.back(); + waiters_.pop_back(); + w->ready = true; + w->work = fn; + w->cv.notify_one(); + } + } + + protected: + void WorkerLoop() { + std::unique_lock<std::mutex> l(mu_); + Waiter w; + while (!exiting_) { + std::function<void()> fn; + if (pending_.empty()) { + // Wait for work to be assigned to me + w.ready = false; + waiters_.push_back(&w); + w.cv.wait(l, [&w]() { return w.ready; }); + fn = w.work; + w.work = nullptr; + } else { + // Pick up pending work + fn = pending_.front(); + pending_.pop_front(); + if (pending_.empty()) { + empty_.notify_all(); + } + } + if (fn) { + mu_.unlock(); + fn(); + mu_.lock(); + } + } + } + + private: + struct Waiter { + std::condition_variable cv; + std::function<void()> work; + bool ready; + }; + + std::mutex mu_; + std::vector<std::thread*> threads_; // All threads + std::vector<Waiter*> waiters_; // Stack of waiting threads. + std::deque<std::function<void()>> pending_; // Queue of pending work + std::condition_variable empty_; // Signaled on pending_.empty() + bool exiting_ = false; +}; + + +// Notification is an object that allows a user to to wait for another +// thread to signal a notification that an event has occurred. +// +// Multiple threads can wait on the same Notification object. +// but only one caller must call Notify() on the object. +class Notification { + public: + Notification() : notified_(false) {} + ~Notification() {} + + void Notify() { + std::unique_lock<std::mutex> l(mu_); + eigen_assert(!notified_); + notified_ = true; + cv_.notify_all(); + } + + void WaitForNotification() { + std::unique_lock<std::mutex> l(mu_); + cv_.wait(l, [this]() { return notified_; } ); + } + + private: + std::mutex mu_; + std::condition_variable cv_; + bool notified_; +}; + +// Runs an arbitrary function and then calls Notify() on the passed in +// Notification. +template <typename Function, typename... Args> struct FunctionWrapper +{ + static void run(Notification* n, Function f, Args... args) { + f(args...); + n->Notify(); + } +}; + +static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { + if (n) { + n->WaitForNotification(); + } +} + + +// Build a thread pool device on top the an existing pool of threads. +struct ThreadPoolDevice { + // The ownership of the thread pool remains with the caller. + ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + + EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_STRONG_INLINE size_t numThreads() const { + return num_threads_; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + // Should return an enum that encodes the ISA supported by the CPU + return 1; + } + + template <class Function, class... Args> + EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { + Notification* n = new Notification(); + std::function<void()> func = + std::bind(&FunctionWrapper<Function, Args...>::run, n, f, args...); + pool_->Schedule(func); + return n; + } + template <class Function, class... Args> + EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { + std::function<void()> func = std::bind(f, args...); + pool_->Schedule(func); + } + + private: + ThreadPoolInterface* pool_; + size_t num_threads_; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H |