diff options
Diffstat (limited to 'tensorflow/core/kernels/maxpooling_op.cc')
-rw-r--r-- | tensorflow/core/kernels/maxpooling_op.cc | 47 |
1 files changed, 31 insertions, 16 deletions
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index e2cf605811..157ce106ce 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include <vector> -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -34,9 +33,11 @@ limitations under the License. #include "tensorflow/core/kernels/pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/maxpooling_op_gpu.h" @@ -358,6 +359,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -405,7 +407,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel { DnnPoolingGradOp<T>::Compute( context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop, - output_shape); + output_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPoolGrad only supports NHWC format"; @@ -420,6 +422,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; #endif // GOOGLE_CUDA @@ -884,6 +887,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel { OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); + + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -902,14 +907,15 @@ class MaxPoolingWithArgmaxOp : public OpKernel { Tensor* argmax = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax)); - LaunchMaxPoolingWithArgmax<Device, T>::launch(context, params, tensor_in, - output, argmax); + LaunchMaxPoolingWithArgmax<Device, T>::launch( + context, params, tensor_in, output, argmax, propagate_nans_); } private: std::vector<int32> ksize_; std::vector<int32> stride_; Padding padding_; + bool propagate_nans_; }; template <typename Device, typename T> @@ -1045,6 +1051,8 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); + + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1068,9 +1076,10 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel { // These is_int8x4 checks avoid linker errors for missing qint8 kernels. if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp<T>::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_, - stride_, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp<T>::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize_, stride_, padding_, data_format_, + tensor_in, out_shape, propagate_nans_); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); @@ -1079,7 +1088,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel { tensor_in, output); } else if (data_format_ == FORMAT_NHWC) { LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in, - output); + output, propagate_nans_); } else { LOG(FATAL) << "MaxPool currently only supports the following (layout, " "type) combinations: (NHWC, non-qint8), " @@ -1098,6 +1107,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template <typename T> @@ -1127,6 +1137,7 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); + ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); } void Compute(OpKernelContext* context) override { @@ -1168,16 +1179,17 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel { ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height, params.out_width, params.depth); if (use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp<T>::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, - stride, padding_, data_format_, tensor_in, out_shape); + DnnPoolingOp<T>::Compute(context, + perftools::gputools::dnn::PoolingMode::kMaximum, + ksize, stride, padding_, data_format_, tensor_in, + out_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPool only supports NHWC format"; Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in, - output); + output, propagate_nans_); } } @@ -1187,18 +1199,20 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel { Padding padding_; TensorFormat data_format_; bool use_dnn_; + bool propagate_nans_; }; template <typename T> struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output) { + const Tensor& input, Tensor* output, bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()( input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, params.out_width, params.window_rows, params.window_cols, params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, - output->flat<T>().data(), nullptr, context->eigen_gpu_device()); + output->flat<T>().data(), nullptr, context->eigen_gpu_device(), + propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardNoMask")); @@ -1209,7 +1223,8 @@ struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> { template <typename T> struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> { static void launch(OpKernelContext* context, const PoolParameters& params, - const Tensor& input, Tensor* output, Tensor* argmax) { + const Tensor& input, Tensor* output, Tensor* argmax, + bool propagate_nans) { bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()( input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, @@ -1217,7 +1232,7 @@ struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> { params.row_stride, params.col_stride, params.pad_rows, params.pad_cols, output->flat<T>().data(), reinterpret_cast<int64*>(argmax->flat<int64>().data()), - context->eigen_gpu_device()); + context->eigen_gpu_device(), propagate_nans); if (!status) { context->SetStatus( errors::Internal("Failed launching MaxPoolForwardWithArgmax")); |