aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-07-08 16:30:48 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-07-08 16:30:48 -0700
commitc285fda7f40ca161e6c8e66481d9a68e50613c48 (patch)
tree46d8f2bebd2e5264bda969f00a4483a2a8d076f3 /unsupported/Eigen/CXX11/src/Tensor
parent7d53633e05986c61ce90e7fc36862d529c0cc036 (diff)
Extended the functionality of the TensorDeviceType classes
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h59
1 files changed, 56 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
index 142edda14..b9c8c19fe 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
@@ -21,6 +21,12 @@ struct DefaultDevice {
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
internal::aligned_free(buffer);
}
+ EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
+ ::memcpy(dst, src, n);
+ }
+ EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
+ ::memset(buffer, c, n);
+ }
};
@@ -28,7 +34,7 @@ struct DefaultDevice {
// We should really use a thread pool here but first we need to find a portable thread pool library.
#ifdef EIGEN_USE_THREADS
struct ThreadPoolDevice {
- ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { }
+ ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { }
size_t numThreads() const { return num_threads_; }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
@@ -37,6 +43,12 @@ struct ThreadPoolDevice {
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
internal::aligned_free(buffer);
}
+ EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
+ ::memcpy(dst, src, n);
+ }
+ EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
+ ::memset(buffer, c, n);
+ }
private:
// todo: NUMA, ...
@@ -47,20 +59,61 @@ struct ThreadPoolDevice {
// GPU offloading
#ifdef EIGEN_USE_GPU
+static int m_numMultiProcessors = 0;
+static int m_maxThreadsPerBlock = 0;
+static int m_maxThreadsPerMultiProcessor = 0;
+
+static inline int getNumCudaMultiProcessors() {
+ if (m_numMultiProcessors == 0) {
+ cudaDeviceProp deviceProp;
+ cudaGetDeviceProperties(&deviceProp, 0);
+ m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
+ m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
+ m_numMultiProcessors = deviceProp.multiProcessorCount;
+ }
+ return m_numMultiProcessors;
+}
+static inline int maxCudaThreadsPerBlock() {
+ if (m_maxThreadsPerBlock == 0) {
+ cudaDeviceProp deviceProp;
+ cudaGetDeviceProperties(&deviceProp, 0);
+ m_numMultiProcessors = deviceProp.multiProcessorCount;
+ m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
+ m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
+ }
+ return m_maxThreadsPerBlock;
+}
+static inline int maxCudaThreadsPerMultiProcessor() {
+ if (m_maxThreadsPerBlock == 0) {
+ cudaDeviceProp deviceProp;
+ cudaGetDeviceProperties(&deviceProp, 0);
+ m_numMultiProcessors = deviceProp.multiProcessorCount;
+ m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
+ m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
+ }
+ return m_maxThreadsPerMultiProcessor;
+}
+
struct GpuDevice {
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); }
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
+ /*EIGEN_DEVICE_FUNC*/ EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
void* result;
cudaMalloc(&result, num_bytes);
return result;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
+ /*EIGEN_DEVICE_FUNC */EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
cudaFree(buffer);
}
+ EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
+ cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, *stream_);
+ }
+ EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
+ cudaMemsetAsync(buffer, c, n, *stream_);
+ }
private:
// TODO: multigpu.