From 403a7cb6c34d163e4f120387b5dc5487d30bb1d5 Mon Sep 17 00:00:00 2001 From: Jeremy Barnes Date: Sun, 10 Jan 2016 22:39:13 -0500 Subject: Alternative way of forcing instantiation of device kernels without causing warnings or requiring device to device kernel invocations. This allows Tensorflow to work on SM 3.0 (ie, Amazon EC2) machines. --- unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 558d0c83d..374edb605 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -116,7 +116,7 @@ struct FullReducer { template static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { - assert(false && "Should only be called on floats"); + eigen_assert(false && "Should only be called on floats"); } static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { @@ -126,7 +126,7 @@ struct FullReducer { const int block_size = 256; const int num_per_thread = 128; const int num_blocks = std::ceil(static_cast(num_coeffs) / (block_size * num_per_thread)); - LAUNCH_CUDA_KERNEL((FullReductionKernel), + LAUNCH_CUDA_KERNEL((FullReductionKernel), num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); } }; -- cgit v1.2.3