aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-01-14 11:45:17 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-01-14 11:45:17 -0800
commit0a0ab6dd158e3f4471ba1fe20454de35b18fdce5 (patch)
treed55cb316a77708a8694d2b546a51685b47752317 /unsupported
parent5692723c588c219bca9523962a4620fe7cc4c4c9 (diff)
Increased the functionality of the tensor devices
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h22
1 files changed, 19 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
index bb05e4177..efd207507 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
@@ -43,11 +43,14 @@ typedef std::promise<void> Promise;
static EIGEN_STRONG_INLINE void wait_until_ready(const Future* f) {
f->wait();
- // eigen_assert(f->ready());
}
+static EIGEN_STRONG_INLINE void get_when_ready(Future* f) {
+ f->get();
+}
+
struct ThreadPoolDevice {
- ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : num_threads_(num_cores) { }
+ ThreadPoolDevice(size_t num_cores) : num_threads_(num_cores) { }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
return internal::aligned_malloc(num_bytes);
@@ -79,9 +82,9 @@ struct ThreadPoolDevice {
}
private:
- // todo: NUMA, ...
size_t num_threads_;
};
+
#endif
@@ -114,6 +117,10 @@ static inline int sharedMemPerBlock() {
return m_deviceProperties.sharedMemPerBlock;
}
+static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
+ cudaError_t status = cudaDeviceSetSharedMemConfig(config);
+ assert(status == cudaSuccess);
+}
struct GpuDevice {
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
@@ -163,10 +170,19 @@ struct GpuDevice {
return 32;
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
+ cudaStreamSynchronize(*stream_);
+ }
+
private:
// TODO: multigpu.
const cudaStream_t* stream_;
};
+
+#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
+ (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
+ assert(cudaGetLastError() == cudaSuccess);
+
#endif
} // end namespace Eigen