aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-01-11 14:25:43 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-01-11 14:25:43 -0800
commitb523771a24320014abfec537b0f4b568c19882eb (patch)
treed24c992f9f34d7ab25761f30366d8a5cb74d8d65 /unsupported
parent2c3b13eded68aa0cef0185a7db9483bfc27d749b (diff)
Silenced several compilation warnings triggered by nvcc.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h48
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h12
3 files changed, 46 insertions, 22 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index c74613873..0f67f0f57 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -10,7 +10,6 @@
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H)
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
-
namespace Eigen {
// This defines an interface that GPUDevice can take to use
@@ -206,20 +205,45 @@ struct GpuDevice {
#endif
}
- inline int getNumCudaMultiProcessors() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
+#ifndef __CUDA_ARCH__
return stream_->deviceProperties().multiProcessorCount;
+#else
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
+ return 0;
+#endif
}
- inline int maxCudaThreadsPerBlock() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
+#ifndef __CUDA_ARCH__
return stream_->deviceProperties().maxThreadsPerBlock;
+#else
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
+ return 0;
+#endif
}
- inline int maxCudaThreadsPerMultiProcessor() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
+#ifndef __CUDA_ARCH__
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
+#else
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
+ return 0;
+#endif
}
- inline int sharedMemPerBlock() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
+#ifndef __CUDA_ARCH__
return stream_->deviceProperties().sharedMemPerBlock;
+#else
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
+ return 0;
+#endif
}
- inline int majorDeviceVersion() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
+#ifndef __CUDA_ARCH__
return stream_->deviceProperties().major;
+#else
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
+ return 0;
+#endif
}
// This function checks if the CUDA runtime recorded an error for the
@@ -239,13 +263,13 @@ struct GpuDevice {
};
#ifndef __CUDA_ARCH__
-#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
- (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
+#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
+ (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
assert(cudaGetLastError() == cudaSuccess);
#else
-#define LAUNCH_CUDA_KERNEL(kernel, ...) \
- { static const auto __attribute__((__unused__)) __makeTheKernelInstantiate = &(kernel); } \
- eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__);
+#define LAUNCH_CUDA_KERNEL(kernel, ...) \
+ { const auto __attribute__((__unused__)) __makeTheKernelInstantiate = &(kernel); } \
+ eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__);
#endif
@@ -260,4 +284,4 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
} // end namespace Eigen
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
+#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index d93e1de1b..d2ab70f2b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -156,14 +156,14 @@ template <typename Expression>
class TensorExecutor<Expression, GpuDevice, false> {
public:
typedef typename Expression::Index Index;
- static void run(const Expression& expr, const GpuDevice& device);
+ static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
};
template <typename Expression>
class TensorExecutor<Expression, GpuDevice, true> {
public:
typedef typename Expression::Index Index;
- static void run(const Expression& expr, const GpuDevice& device);
+ static EIGEN_DEVICE_FUNC void run(const Expression& expr, const GpuDevice& device);
};
#if defined(__CUDACC__)
@@ -213,7 +213,7 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
/*static*/
template <typename Expression>
-inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
+EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
{
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
@@ -232,7 +232,7 @@ inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression&
/*static*/
template<typename Expression>
-inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
+EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
{
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 3fa3d5c3c..867654aff 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -115,8 +115,8 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
internal::is_same<typename Self::CoeffReturnType, float>::value;
template <typename OutputType>
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
- eigen_assert(false && "Should only be called on floats");
+ static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
+ assert(false && "Should only be called on floats");
}
static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) {
@@ -210,11 +210,11 @@ struct InnerReducer<Self, Op, GpuDevice> {
internal::is_same<typename Self::CoeffReturnType, float>::value;
template <typename Device, typename OutputType>
- static void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
+ static EIGEN_DEVICE_FUNC void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
assert(false && "Should only be called to reduce floats on a gpu device");
}
- static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
+ static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
typedef typename Self::Index Index;
const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
@@ -264,11 +264,11 @@ struct OuterReducer<Self, Op, GpuDevice> {
internal::is_same<typename Self::CoeffReturnType, float>::value;
template <typename Device, typename OutputType>
- static void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
+ static EIGEN_DEVICE_FUNC void run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
assert(false && "Should only be called to reduce floats on a gpu device");
}
- static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
+ static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
typedef typename Self::Index Index;
const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;