Merged in rmlarsen/eigen (pull request PR-643)

Make Eigen build with cuda 10 and clang. Approved-by: Justin Lebar <justin.lebar@gmail.com>
author: Rasmus Larsen <rmlarsen@google.com> 2019-05-20 17:02:39 +0000
committer: Rasmus Larsen <rmlarsen@google.com> 2019-05-20 17:02:39 +0000
commit: e92486b8c34272f6eae563665fc89b7f0abb902f (patch)
tree: 3e5554b3ec06ff8dcee8519ff6c5e824854deb89 /unsupported/Eigen/CXX11
parent: cc7ecbb1241cdc16b0b0f27631c556d8eda5aa4b (diff)
parent: fd595d42a76437cc9df0fcfbc49c5372e416c8d6 (diff)
1 files changed, 0 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
index 0718ba2a1..7ee4a6087 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@@ -674,10 +674,6 @@ struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
     if (num_blocks > 1) {
       // We initialize the outputs outside the reduction kernel when we can't be sure that there
       // won't be a race conditions between multiple thread blocks.
-      const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
-      const int max_blocks = device.getNumGpuMultiProcessors() *
-                           device.maxGpuThreadsPerMultiProcessor() / 1024;
-      const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
       LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
                          1, 1, 0, device, reducer, self, num_preserved_vals, output);
     }
author	Rasmus Larsen <rmlarsen@google.com>	2019-05-20 17:02:39 +0000
committer	Rasmus Larsen <rmlarsen@google.com>	2019-05-20 17:02:39 +0000
commit	e92486b8c34272f6eae563665fc89b7f0abb902f (patch)
tree	3e5554b3ec06ff8dcee8519ff6c5e824854deb89 /unsupported/Eigen/CXX11
parent	cc7ecbb1241cdc16b0b0f27631c556d8eda5aa4b (diff)
parent	fd595d42a76437cc9df0fcfbc49c5372e416c8d6 (diff)