diff options
Diffstat (limited to 'unsupported/Eigen')
3 files changed, 11 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index 90ee50678..126a9e4ad 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -1261,7 +1261,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT Base(op, device) {} // We need to redefine this method to make nvcc happy - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { this->m_leftImpl.evalSubExprsIfNeeded(NULL); this->m_rightImpl.evalSubExprsIfNeeded(NULL); if (data) { @@ -1274,7 +1274,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT } } - void evalTo(Scalar* buffer) const { + EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { if (this->m_lhs_inner_dim_contiguous) { if (this->m_rhs_inner_dim_contiguous) { if (this->m_rhs_inner_dim_reordered) { @@ -1313,10 +1313,11 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT } } - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> + template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> EIGEN_DEVICE_FUNC void evalTyped(Scalar* buffer) const { // columns in left side, rows in right side const Index k = this->m_k_size; + EIGEN_UNUSED_VARIABLE(k) // rows in left side const Index m = this->m_i_size; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 0f67f0f57..5abdc489b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -275,10 +275,14 @@ struct GpuDevice { // FIXME: Should be device and kernel specific. #ifdef __CUDACC__ -static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { +static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { +#ifndef __CUDA_ARCH__ cudaError_t status = cudaDeviceSetSharedMemConfig(config); EIGEN_UNUSED_VARIABLE(status) assert(status == cudaSuccess); +#else + EIGEN_UNUSED_VARIABLE(config) +#endif } #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 59404dba5..89f055134 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -220,6 +220,8 @@ struct InnerReducer<Self, Op, GpuDevice> { const int block_size = 256; const int num_per_thread = 128; const int num_blocks = 32; + EIGEN_UNUSED_VARIABLE(block_size) + EIGEN_UNUSED_VARIABLE(num_blocks) LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>), num_blocks, block_size, block_size*sizeof(float), device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); |