aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-05-15 13:32:15 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-05-15 13:32:15 -0700
commitab0a30e4292f109326b444dabb1e9e0c1dc29881 (patch)
tree352aad7d121a118aeba018ca146ade1e84860eb5 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
parentc8d8d5c0fcfe31eb43005245e36627e104ad2e5f (diff)
Make Eigen build with cuda 10 and clang.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h4
1 files changed, 0 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
index 0718ba2a1..7ee4a6087 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@@ -674,10 +674,6 @@ struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
if (num_blocks > 1) {
// We initialize the outputs outside the reduction kernel when we can't be sure that there
// won't be a race conditions between multiple thread blocks.
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
- const int max_blocks = device.getNumGpuMultiProcessors() *
- device.maxGpuThreadsPerMultiProcessor() / 1024;
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
1, 1, 0, device, reducer, self, num_preserved_vals, output);
}