aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-01-11 17:49:44 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-01-11 17:49:44 -0800
commitbd7d901da9bd824ee2a7a94d3b0c8668d77f5ff2 (patch)
tree1eaa9358b196ac2f638027fe78ce383ddd343231 /unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
parentbbdabbb37976a37ab585d266691100bdd035f30b (diff)
Reverted a previous change that tripped nvcc when compiling in debug mode.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h6
1 files changed, 3 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
index 126a9e4ad..a5f3debc4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
@@ -1274,7 +1274,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
}
}
- EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {
+ void evalTo(Scalar* buffer) const {
if (this->m_lhs_inner_dim_contiguous) {
if (this->m_rhs_inner_dim_contiguous) {
if (this->m_rhs_inner_dim_reordered) {
@@ -1313,7 +1313,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
}
}
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> EIGEN_DEVICE_FUNC
+ template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
void evalTyped(Scalar* buffer) const {
// columns in left side, rows in right side
const Index k = this->m_k_size;
@@ -1362,7 +1362,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
const dim3 block_size(16, 16, 1);
LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k);
} else {
- const Index m_blocks = (m + 127) / 128;
+ const Index m_blocks = (m + 127) / 128;
const Index n_blocks = (n + 63) / 64;
const dim3 num_blocks(m_blocks, n_blocks, 1);
const dim3 block_size(8, 32, 1);