diff options
author | Jeremy Barnes <jeremy@barneso.com> | 2016-01-10 22:39:13 -0500 |
---|---|---|
committer | Jeremy Barnes <jeremy@barneso.com> | 2016-01-10 22:39:13 -0500 |
commit | 403a7cb6c34d163e4f120387b5dc5487d30bb1d5 (patch) | |
tree | ce6b06d27b3f71cfa8bdc8904cf9f2280217e886 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | |
parent | b557662e589a76265f73b99d7ca54a988d7bdb59 (diff) |
Alternative way of forcing instantiation of device kernels without
causing warnings or requiring device to device kernel invocations.
This allows Tensorflow to work on SM 3.0 (ie, Amazon EC2) machines.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 558d0c83d..374edb605 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -116,7 +116,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { template <typename OutputType> static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { - assert(false && "Should only be called on floats"); + eigen_assert(false && "Should only be called on floats"); } static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { @@ -126,7 +126,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { const int block_size = 256; const int num_per_thread = 128; const int num_blocks = std::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread)); - LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread>), + LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>), num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); } }; |