diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-09-06 13:20:30 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-09-06 13:24:41 -0700 |
commit | 2b15badd96c651d4d191426975a1773dff4a03b8 (patch) | |
tree | 30406a237f324cb3993c8fbb2c49dd0c1f9ed624 /tensorflow/stream_executor/stream.h | |
parent | ca65468a02d4b2ceb78cf5c130ad275a4eefe6bb (diff) |
Add int8 version of fused_conv2d_bias_activation operator for the forward phase,
and support side_input and scaling parameters in float and int8 versions.
PiperOrigin-RevId: 167763219
Diffstat (limited to 'tensorflow/stream_executor/stream.h')
-rw-r--r-- | tensorflow/stream_executor/stream.h | 98 |
1 files changed, 74 insertions, 24 deletions
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index 9bd4c21a66..a418fe961c 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -240,15 +240,17 @@ class Stream { DeviceMemory<float> *offset_backprop); // TODO(leary) add double-precision version of this interface. - Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<float> &input_data, - const dnn::FilterDescriptor &filter_descriptor, - const DeviceMemory<float> &filter_data, - const dnn::ConvolutionDescriptor &convolution_descriptor, - const DeviceMemory<float> &biases, - dnn::ActivationMode activation_mode, - const dnn::BatchDescriptor &output_descriptor, - DeviceMemory<float> *output); + Stream &ThenFusedConvolve( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<int8> &conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int8> &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<int8> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory<int8> *output); Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor, const DeviceMemory<float> &input_data, @@ -278,23 +280,39 @@ class Stream { const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output_data); - Stream &ThenConvolveWithScratch( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<Eigen::half> &input_data, + Stream &ThenFusedConvolveWithScratch( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<int8> &conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int8> &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<int8> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output, + ScratchAllocator *scratch_allocator); + + Stream &ThenFusedConvolveWithScratch( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<Eigen::half> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, const DeviceMemory<Eigen::half> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator); - Stream &ThenConvolveWithScratch( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<float> &input_data, + Stream &ThenFusedConvolveWithScratch( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<float> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<float> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<float> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output, ScratchAllocator *scratch_allocator); @@ -323,7 +341,6 @@ class Stream { const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<float> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, - const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output, ScratchAllocator *scratch_allocator, const dnn::AlgorithmConfig &algorithm_config, @@ -335,35 +352,68 @@ class Stream { const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<Eigen::half> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, - const DeviceMemory<Eigen::half> &biases, - dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator, const dnn::AlgorithmConfig &algorithm_config, dnn::ProfileResult *output_profile_result); - Stream &ThenConvolveWithAlgorithm( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<float> &input_data, + Stream &ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<double> &conv_input_data, double conv_input_scale, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<double> &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<double> &side_input_data, double side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<double> &biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory<double> *output, ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result); + + Stream &ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<float> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<float> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<float> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output, ScratchAllocator *scratch_allocator, const dnn::AlgorithmConfig &algorithm_config, dnn::ProfileResult *output_profile_result); - Stream &ThenConvolveWithAlgorithm( - const dnn::BatchDescriptor &input_descriptor, - const DeviceMemory<Eigen::half> &input_data, + Stream &ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale, const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory<Eigen::half> &filter_data, const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<Eigen::half> &biases, + dnn::ActivationMode activation_mode, const dnn::BatchDescriptor &output_descriptor, DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator, const dnn::AlgorithmConfig &algorithm_config, dnn::ProfileResult *output_profile_result); + Stream &ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory<int8> &conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int8> &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory<int8> &side_input_data, float side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result); + Stream &ThenSeparableConvolve( const dnn::BatchDescriptor &input_descriptor, const DeviceMemory<float> &input_data, |