diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-08-23 16:58:50 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-23 17:02:42 -0700 |
commit | 1f41602a82cb68fc7bc7e51cf9590a87ee5baf4d (patch) | |
tree | 329ba37a6761ae9506d94088b9c8d3c2e90d5803 /tensorflow/stream_executor/stream.cc | |
parent | 2272987f13be76105fcd24dd38cf768c2d4fec0d (diff) |
Add int8 version of fused_conv2d_bias_activation operator for the forward phase,
and support side_input and scaling parameters in float and int8 versions.
PiperOrigin-RevId: 166276461
Diffstat (limited to 'tensorflow/stream_executor/stream.cc')
-rw-r--r-- | tensorflow/stream_executor/stream.cc | 206 |
1 files changed, 150 insertions, 56 deletions
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index c9b36ba7ab..dc768e0273 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -361,28 +361,66 @@ Stream &Stream::ThenBatchNormalizationBackward( return *this; } -Stream &Stream::ThenConvolveWithScratch( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<Eigen::half> &input_data, +Stream &Stream::ThenFusedConvolveWithScratch( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<int8> &conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int8> &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<int8> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output, + ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data), + PARAM(conv_input_scale), PARAM(filter_descriptor), + PARAM(filter_data), PARAM(convolution_descriptor), + PARAM(side_input_data), PARAM(side_input_scale), + PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), + PARAM(output_descriptor), PARAM(output)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, + side_input_data, side_input_scale, bias_descriptor, biases, + activation_mode, output_descriptor, output, scratch_allocator, + dnn::AlgorithmConfig(), /*output_profile_result=*/nullptr)); + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenFusedConvolveWithScratch( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<Eigen::half> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, const DeviceMemory<Eigen::half> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator) { - VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), - PARAM(filter_descriptor), PARAM(filter_data), - PARAM(convolution_descriptor), PARAM(biases), - PARAM(activation_mode), PARAM(output_descriptor), PARAM(output)); + VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data), + PARAM(conv_input_scale), PARAM(filter_descriptor), + PARAM(filter_data), PARAM(convolution_descriptor), + PARAM(side_input_data), PARAM(side_input_scale), + PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), + PARAM(output_descriptor), PARAM(output)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, biases, activation_mode, output_descriptor, - output, scratch_allocator, dnn::AlgorithmConfig(), - /*output_profile_result=*/nullptr)); + CheckError(dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, + side_input_data, side_input_scale, bias_descriptor, biases, + activation_mode, output_descriptor, output, scratch_allocator, + dnn::AlgorithmConfig(), /*output_profile_result=*/nullptr)); } else { SetErrorAndLogNoDnnSupport(); } @@ -390,27 +428,32 @@ Stream &Stream::ThenConvolveWithScratch( return *this; } -Stream &Stream::ThenConvolveWithScratch( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<float> &input_data, +Stream &Stream::ThenFusedConvolveWithScratch( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<float> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<float> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<float> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output, ScratchAllocator *scratch_allocator) { - VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), - PARAM(filter_descriptor), PARAM(filter_data), - PARAM(convolution_descriptor), PARAM(biases), - PARAM(activation_mode), PARAM(output_descriptor), PARAM(output)); + VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data), + PARAM(conv_input_scale), PARAM(filter_descriptor), + PARAM(filter_data), PARAM(convolution_descriptor), + PARAM(side_input_data), PARAM(side_input_scale), + PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), + PARAM(output_descriptor), PARAM(output)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, biases, activation_mode, output_descriptor, - output, scratch_allocator, dnn::AlgorithmConfig(), - /*output_profile_result=*/nullptr)); + CheckError(dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, + side_input_data, side_input_scale, bias_descriptor, biases, + activation_mode, output_descriptor, output, scratch_allocator, + dnn::AlgorithmConfig(), /*output_profile_result=*/nullptr)); } else { SetErrorAndLogNoDnnSupport(); } @@ -472,29 +515,34 @@ Stream &Stream::ThenConvolveWithScratch( return *this; } -Stream &Stream::ThenConvolveWithAlgorithm( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<float> &input_data, +Stream &Stream::ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<float> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<float> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<float> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output, ScratchAllocator *scratch_allocator, const dnn::AlgorithmConfig &algorithm_config, dnn::ProfileResult *output_profile_result) { - VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), - PARAM(filter_descriptor), PARAM(filter_data), - PARAM(convolution_descriptor), PARAM(biases), + VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data), + PARAM(conv_input_scale), PARAM(filter_descriptor), + PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases), + PARAM(side_input_data), PARAM(side_input_scale), PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, biases, activation_mode, output_descriptor, - output, scratch_allocator, algorithm_config, output_profile_result); + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, + side_input_data, side_input_scale, bias_descriptor, biases, + activation_mode, output_descriptor, output, scratch_allocator, + algorithm_config, output_profile_result); if (!status && !output_profile_result) { SetError(); } @@ -505,30 +553,73 @@ Stream &Stream::ThenConvolveWithAlgorithm( return *this; } -Stream &Stream::ThenConvolveWithAlgorithm( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<Eigen::half> &input_data, +Stream &Stream::ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<Eigen::half> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, const DeviceMemory<Eigen::half> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator, const dnn::AlgorithmConfig &algorithm_config, dnn::ProfileResult *output_profile_result) { - VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), - PARAM(filter_descriptor), PARAM(filter_data), - PARAM(convolution_descriptor), PARAM(biases), - PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), - PARAM(algorithm_config)); + VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data), + PARAM(conv_input_scale), PARAM(filter_descriptor), + PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases), + PARAM(side_input_data), PARAM(side_input_scale), + PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), + PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); if (ok()) { if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, biases, activation_mode, output_descriptor, - output, scratch_allocator, algorithm_config, output_profile_result); + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, + side_input_data, side_input_scale, bias_descriptor, biases, + activation_mode, output_descriptor, output, scratch_allocator, + algorithm_config, output_profile_result); + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<int8> &conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int8> &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<int8> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result) { + VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data), + PARAM(conv_input_scale), PARAM(filter_descriptor), + PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases), + PARAM(side_input_data), PARAM(side_input_scale), + PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), + PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, + side_input_data, side_input_scale, bias_descriptor, biases, + activation_mode, output_descriptor, output, scratch_allocator, + algorithm_config, output_profile_result); if (!status && !output_profile_result) { SetError(); } @@ -601,19 +692,22 @@ Stream &Stream::ThenConvolveWithAlgorithm( return *this; } -Stream &Stream::ThenConvolve( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<float> &input_data, +Stream &Stream::ThenFusedConvolve( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<int8> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, - const DeviceMemory<float> &filter_data, + const DeviceMemory<int8> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<int8> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, - const dnn::BatchDescriptor &output_descriptor, - DeviceMemory<float> *output) { - return ThenConvolveWithScratch( - input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, biases, activation_mode, output_descriptor, - output, /*scratch_allocator=*/nullptr); + const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output) { + return ThenFusedConvolveWithScratch( + conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output, + /*scratch_allocator=*/nullptr); } Stream &Stream::ThenConvolve( |