diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-07-17 18:08:14 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-07-17 18:11:21 -0700 |
commit | 8238266c4fd433107f38eb126a5c5da05a4d338b (patch) | |
tree | fc4f923e52e8df2aedde5bc180766d501b9b61bf /tensorflow/contrib/fused_conv | |
parent | 07cc6474b219ee3ad9f55860e621f61b34bb6bd1 (diff) |
Support identity activation function in Cudnn implementation of fused conv2d bias activation.
PiperOrigin-RevId: 205008958
Diffstat (limited to 'tensorflow/contrib/fused_conv')
4 files changed, 44 insertions, 21 deletions
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 2458f7554a..4554a3d89a 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -135,9 +135,12 @@ class FusedConv2DBiasActivationOp : public OpKernel { context->GetAttr("activation_mode", &activation_mode_str)); OP_REQUIRES_OK(context, GetActivationModeFromString(activation_mode_str, &activation_mode_)); - OP_REQUIRES(context, activation_mode_ == ActivationMode::RELU, - errors::InvalidArgument("Current implementation only supports " - "RELU as the activation function.")); + OP_REQUIRES(context, + activation_mode_ == ActivationMode::RELU || + activation_mode_ == ActivationMode::NONE, + errors::InvalidArgument( + "Current implementation only supports RELU or NONE " + "as the activation function.")); cudnn_use_autotune_ = CudnnUseAutotune(); } @@ -538,6 +541,18 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>:: activation_mode, }; + dnn::ActivationMode dnn_activation_mode; + switch (activation_mode) { + case ActivationMode::NONE: + dnn_activation_mode = dnn::ActivationMode::kNone; + break; + case ActivationMode::RELU: + dnn_activation_mode = dnn::ActivationMode::kRelu; + break; + default: + LOG(FATAL) << "Activation mode " << activation_mode << " not supported"; + } + dnn::AlgorithmConfig algorithm_config; if (cudnn_use_autotune && !AutoTuneConvBiasActivation::GetInstance()->Find( fused_conv_parameters, &algorithm_config)) { @@ -558,10 +573,9 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>:: ->ThenFusedConvolveWithAlgorithm( conv_input_desc, conv_input_ptr, conv_input_scale, filter_desc, filter_ptr, conv_desc, side_input_ptr, - side_input_scale, bias_desc, bias_ptr, - dnn::ActivationMode::kRelu, output_desc, &output_ptr, - &scratch_allocator, dnn::AlgorithmConfig(profile_algorithm), - &profile_result) + side_input_scale, bias_desc, bias_ptr, dnn_activation_mode, + output_desc, &output_ptr, &scratch_allocator, + dnn::AlgorithmConfig(profile_algorithm), &profile_result) .ok(); if (cudnn_launch_status) { if (profile_result.is_valid()) { @@ -597,7 +611,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>:: ->ThenFusedConvolveWithAlgorithm( conv_input_desc, conv_input_ptr, conv_input_scale, filter_desc, filter_ptr, conv_desc, side_input_ptr, side_input_scale, - bias_desc, bias_ptr, dnn::ActivationMode::kRelu, output_desc, + bias_desc, bias_ptr, dnn_activation_mode, output_desc, &output_ptr, &scratch_allocator, algorithm_config, /*output_profile_result=*/nullptr) .ok(); diff --git a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc index bafd1d5941..410571f378 100644 --- a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc @@ -44,7 +44,7 @@ REGISTER_OP("FusedConv2DBiasActivation") .Attr(GetPaddingAttrString()) .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'") .Attr("filter_format: {'HWIO', 'OIHW', 'OIHW_VECT_I'} = 'HWIO'") - .Attr("activation_mode: {'Relu'} = 'Relu'") + .Attr("activation_mode: {'Relu', 'None'} = 'Relu'") .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](shape_inference::InferenceContext* c) { using shape_inference::ShapeHandle; @@ -144,7 +144,7 @@ REGISTER_OP("FusedConv2DBiasActivation") `qint8 [ output_channels, input_channels / 4, kernel_height, kernel_width, input_channels % 4 ]` activation_mode: The activation applied to the output. - Currently must be "Relu". + Must be "Relu" or "None". dilations: 1-D tensor of length 4. The dilation factor for each dimension of `input`. If set to k > 1, there will be k-1 skipped cells between each filter element on that dimension. The dimension order is determined diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py index 983b6dc8e5..cdc07b935d 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py @@ -66,8 +66,10 @@ def fused_conv2d_bias_activation(conv_input, This is optional and defaults to 0. side_input: A `Tensor` of the format specified by `data_format`. This is useful for implementing ResNet blocks. - activation_mode: (optional) currently must be the default "Relu". - Note that in qint8 mode, it also clips to 127, so acts like ReluX. + activation_mode: (optional) currently supports the default "Relu", or + "None" activation function. + Note: in qint8 mode, "None" actually clips to the range [-128, 127], + while "Relu" clips to the range [0, 127]. data_format: Specifies the data format. Possible values are: "NHWC" float [batch, height, width, channels] diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index 4d62ac65ff..0185ef662c 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -622,7 +622,7 @@ def HwioToOihw(in_tensor): def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, - side_input, biases): + side_input, biases, apply_relu): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and @@ -636,6 +636,9 @@ def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. + apply_relu: A boolean to specify whether to apply "Relu" activation function + that clips outputs to the range [0, 127], or "None" activation that clips + to the range [-128, 127]. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ @@ -649,10 +652,12 @@ def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) - logit = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") + output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") + if apply_relu: + output = nn_ops.relu(output) result, _, _ = gen_array_ops.quantize_v2( - NchwToNchwVectC(nn_ops.relu(logit)), -128, 127, dtypes.qint8) + NchwToNchwVectC(output), -128, 127, dtypes.qint8) return result @@ -795,7 +800,7 @@ class FusedConvInt8Tests(test.TestCase): }, ] - def runTest(self, test_param): + def runTest(self, test_param, apply_relu): batch_size = test_param["batch_size"] input_channels = test_param["input_channels"] output_channels = test_param["output_channels"] @@ -831,8 +836,8 @@ class FusedConvInt8Tests(test.TestCase): vertical_stride, padding_type) output_width = CalculateConvolvedOutputDim(input_width, filter_width, horizontal_stride, padding_type) - tf_logging.info("output_height=", output_height, ", output_width=", - output_width) + tf_logging.info("output_height=", output_height, ", output_width=", + output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( @@ -858,12 +863,13 @@ class FusedConvInt8Tests(test.TestCase): conv_input_scale=conv_input_scale, side_input_scale=side_input_scale, side_input=side_input, + activation_mode="Relu" if apply_relu else "None", data_format="NCHW_VECT_C", filter_format="OIHW_VECT_I") expected = SimulateFusedConv2dBiasActivationInt8( conv_input_scale, conv_input, kernel, padding_type, strides, - side_input_scale, side_input, biases) + side_input_scale, side_input, biases, apply_relu) with self.test_session(use_gpu=True) as sess: actual_y, expected_y = sess.run([actual, expected]) @@ -877,8 +883,9 @@ class FusedConvInt8Tests(test.TestCase): tf_logging.info("int8 test skipped because not run with --config=cuda or " "no GPUs with compute capability >= 6.1 are available.") return - for test_param in self._test_params: - self.runTest(test_param) + for apply_relu in [True, False]: + for test_param in self._test_params: + self.runTest(test_param, apply_relu) if __name__ == "__main__": |