aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/maxpooling_op.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/maxpooling_op.cc')
-rw-r--r--tensorflow/core/kernels/maxpooling_op.cc47
1 files changed, 31 insertions, 16 deletions
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index e2cf605811..157ce106ce 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -20,7 +20,6 @@ limitations under the License.
#include "tensorflow/core/kernels/maxpooling_op.h"
#include <vector>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/common_runtime/device.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h"
@@ -34,9 +33,11 @@ limitations under the License.
#include "tensorflow/core/kernels/pooling_ops_common.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/util/env_var.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/tensor_format.h"
#include "tensorflow/core/util/use_cudnn.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#if GOOGLE_CUDA
#include "tensorflow/core/kernels/maxpooling_op_gpu.h"
@@ -358,6 +359,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
use_dnn_ = CanUseCudnn();
+ ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
}
void Compute(OpKernelContext* context) override {
@@ -405,7 +407,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
DnnPoolingGradOp<T>::Compute(
context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop,
- output_shape);
+ output_shape, propagate_nans_);
} else {
CHECK(data_format_ == FORMAT_NHWC)
<< "Non-Cudnn MaxPoolGrad only supports NHWC format";
@@ -420,6 +422,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
Padding padding_;
TensorFormat data_format_;
bool use_dnn_;
+ bool propagate_nans_;
};
#endif // GOOGLE_CUDA
@@ -884,6 +887,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
errors::Unimplemented(
"Pooling is not yet supported on the batch dimension."));
+
+ ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
}
void Compute(OpKernelContext* context) override {
@@ -902,14 +907,15 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
Tensor* argmax = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(1, out_shape, &argmax));
- LaunchMaxPoolingWithArgmax<Device, T>::launch(context, params, tensor_in,
- output, argmax);
+ LaunchMaxPoolingWithArgmax<Device, T>::launch(
+ context, params, tensor_in, output, argmax, propagate_nans_);
}
private:
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
+ bool propagate_nans_;
};
template <typename Device, typename T>
@@ -1045,6 +1051,8 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
errors::Unimplemented(
"Pooling is not yet supported on the batch dimension."));
use_dnn_ = CanUseCudnn();
+
+ ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
}
void Compute(OpKernelContext* context) override {
@@ -1068,9 +1076,10 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
// These is_int8x4 checks avoid linker errors for missing qint8 kernels.
if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) {
- DnnPoolingOp<T>::Compute(
- context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_,
- stride_, padding_, data_format_, tensor_in, out_shape);
+ DnnPoolingOp<T>::Compute(context,
+ perftools::gputools::dnn::PoolingMode::kMaximum,
+ ksize_, stride_, padding_, data_format_,
+ tensor_in, out_shape, propagate_nans_);
} else {
Tensor* output = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
@@ -1079,7 +1088,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
tensor_in, output);
} else if (data_format_ == FORMAT_NHWC) {
LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
- output);
+ output, propagate_nans_);
} else {
LOG(FATAL) << "MaxPool currently only supports the following (layout, "
"type) combinations: (NHWC, non-qint8), "
@@ -1098,6 +1107,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
Padding padding_;
TensorFormat data_format_;
bool use_dnn_;
+ bool propagate_nans_;
};
template <typename T>
@@ -1127,6 +1137,7 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
}
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
use_dnn_ = CanUseCudnn();
+ ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
}
void Compute(OpKernelContext* context) override {
@@ -1168,16 +1179,17 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height,
params.out_width, params.depth);
if (use_dnn_ && data_format_ == FORMAT_NCHW) {
- DnnPoolingOp<T>::Compute(
- context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize,
- stride, padding_, data_format_, tensor_in, out_shape);
+ DnnPoolingOp<T>::Compute(context,
+ perftools::gputools::dnn::PoolingMode::kMaximum,
+ ksize, stride, padding_, data_format_, tensor_in,
+ out_shape, propagate_nans_);
} else {
CHECK(data_format_ == FORMAT_NHWC)
<< "Non-Cudnn MaxPool only supports NHWC format";
Tensor* output = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
- output);
+ output, propagate_nans_);
}
}
@@ -1187,18 +1199,20 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
Padding padding_;
TensorFormat data_format_;
bool use_dnn_;
+ bool propagate_nans_;
};
template <typename T>
struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
static void launch(OpKernelContext* context, const PoolParameters& params,
- const Tensor& input, Tensor* output) {
+ const Tensor& input, Tensor* output, bool propagate_nans) {
bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
params.tensor_in_cols, params.depth, params.out_height,
params.out_width, params.window_rows, params.window_cols,
params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
- output->flat<T>().data(), nullptr, context->eigen_gpu_device());
+ output->flat<T>().data(), nullptr, context->eigen_gpu_device(),
+ propagate_nans);
if (!status) {
context->SetStatus(
errors::Internal("Failed launching MaxPoolForwardNoMask"));
@@ -1209,7 +1223,8 @@ struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
template <typename T>
struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
static void launch(OpKernelContext* context, const PoolParameters& params,
- const Tensor& input, Tensor* output, Tensor* argmax) {
+ const Tensor& input, Tensor* output, Tensor* argmax,
+ bool propagate_nans) {
bool status = functor::MaxPoolForwardWithOptionalArgmax<T>()(
input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
params.tensor_in_cols, params.depth, params.out_height,
@@ -1217,7 +1232,7 @@ struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
output->flat<T>().data(),
reinterpret_cast<int64*>(argmax->flat<int64>().data()),
- context->eigen_gpu_device());
+ context->eigen_gpu_device(), propagate_nans);
if (!status) {
context->SetStatus(
errors::Internal("Failed launching MaxPoolForwardWithArgmax"));