diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-09-06 13:20:30 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-09-06 13:24:41 -0700 |
commit | 2b15badd96c651d4d191426975a1773dff4a03b8 (patch) | |
tree | 30406a237f324cb3993c8fbb2c49dd0c1f9ed624 /tensorflow/stream_executor/dnn.h | |
parent | ca65468a02d4b2ceb78cf5c130ad275a4eefe6bb (diff) |
Add int8 version of fused_conv2d_bias_activation operator for the forward phase,
and support side_input and scaling parameters in float and int8 versions.
PiperOrigin-RevId: 167763219
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 139 |
1 files changed, 105 insertions, 34 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 0a0ad7d9fb..0a4525c1b7 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -669,6 +669,7 @@ class PoolingDescriptor { typedef int64 AlgorithmType; constexpr AlgorithmType kDefaultAlgorithm = -1; +constexpr AlgorithmType kNoSuitableAlgorithmFound = -2; // Describes the result from a perf experiment. // @@ -912,20 +913,32 @@ class DnnSupport { return false; } - // Enqueues a single-precision convolution operation onto the stream. + // Enqueues a fused convolution operation onto the stream. + // We provide several variants with different types for inputs, biases and + // scaling parameters. // // Arguments (all borrowed): // stream: borrowed pointer to the stream that the 'convolve' operation // should be enqueued onto. - // input_descriptor: dimensions of the input layer. - // input_data: un-owned device memory region which contains the + // conv_input_descriptor: dimensions of the convolution input layer. + // conv_input_data: un-owned device memory region which contains the // convolution input. + // conv_input_scale: a floating point scale to multiply with each element + // of conv_input_data. // filter_descriptor: dimensions of the convolution filter. + // filter_data: un-owned device memory region which contains the + // convolution filter weights. // convolution_descriptor: stride of the convolution filter. // biases: un-owned device memory region containing biases to add to the - // input. This can be DeviceMemory pointing to NULL only when activation_mode - // is kNone. + // input. // activation_mode: Type of activation to perform. + // side_input_data: un-owned device memory region which contains optional + // side input data. If 'side_input_scale' is non-zero, then this must + // point to data in the tensor shape specified by output_shape. + // It will be scaled by 'side_input_scale' and added to the convolution + // result and bias prior to applying the activation function. + // side_input_scale: a floating point scale to multiply with each element + // of side_input_data. // output_descriptor: dimensions of the output layer. // output_data: un-owned device memory region in which to place the // convolution result. @@ -938,7 +951,7 @@ class DnnSupport { // output_profile_result: the output profile result for this call. The // profiling is only enabled when this is not nullptr. // - // input_descriptor, filter_descriptor, convolution_descriptor and + // conv_input_descriptor, filter_descriptor, convolution_descriptor and // output_descriptor together specify exactly how the convolution is aligned // with the input data: // @@ -952,55 +965,115 @@ class DnnSupport { // that if the inverse of the filter is applied to the output in VALID mode // the result is the same size as the input - this requires even more // padding of the input. - virtual bool DoConvolve( - Stream* stream, const dnn::BatchDescriptor& input_descriptor, - const DeviceMemory<float>& input_data, + virtual bool DoFusedConvolve( + Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, + const DeviceMemory<double>& conv_input_data, double conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, - const DeviceMemory<float>& filter_data, + const DeviceMemory<double>& filter_data, const dnn::ConvolutionDescriptor& convolution_descriptor, - const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode, + const DeviceMemory<double>& side_input_data, double side_input_scale, + const dnn::BatchDescriptor& bias_descriptor, + const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor& output_descriptor, - DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator, + DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, - ProfileResult* output_profile_result) { + dnn::ProfileResult* output_profile_result) { return false; } - // Enqueues a double-precision fused convolution, bias add, and activation - // operation onto the stream. See DoConvolve above for argument details. - virtual bool DoConvolve( - Stream* stream, const dnn::BatchDescriptor& batch_descriptor, - const DeviceMemory<double>& input_data, + // This is the float version of DoFusedConvolve. + virtual bool DoFusedConvolve( + Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, + const DeviceMemory<float>& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, - const DeviceMemory<double>& filter_data, + const DeviceMemory<float>& filter_data, const dnn::ConvolutionDescriptor& convolution_descriptor, - const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode, + const DeviceMemory<float>& side_input_data, float side_input_scale, + const dnn::BatchDescriptor& bias_descriptor, + const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor& output_descriptor, - DeviceMemory<double>* output_data) { + DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) { return false; } - // Enqueues a half-precision fused convolution, bias add, and activation - // operation onto the stream. See DoConvolve above for argument details. - virtual bool DoConvolve( - Stream* stream, const dnn::BatchDescriptor& batch_descriptor, - const DeviceMemory<Eigen::half>& input_data, + // This is the Eigen::half version of DoFusedConvolve. + // The scaling parameters are still floats. + virtual bool DoFusedConvolve( + Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, + const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory<Eigen::half>& filter_data, const dnn::ConvolutionDescriptor& convolution_descriptor, + const DeviceMemory<Eigen::half>& side_input_data, float side_input_scale, + const dnn::BatchDescriptor& bias_descriptor, const DeviceMemory<Eigen::half>& biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor& output_descriptor, DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, - ProfileResult* output_profile_result) { + dnn::ProfileResult* output_profile_result) { return false; } - // Enqueues a single-precision convolution operation (without bias add - // or activation) onto the stream. - // See DoConvolve above for argument details. + // This is the int8 version of DoFusedConvolve. + // The bias input and scaling parameters are floats. + virtual bool DoFusedConvolve( + Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, + const DeviceMemory<int8>& conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory<int8>& filter_data, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const DeviceMemory<int8>& side_input_data, float side_input_scale, + const dnn::BatchDescriptor& bias_descriptor, + const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<int8>* output_data, ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) { + return false; + } + + // Enqueues a single-precision convolution operation onto the stream. + // + // Arguments (all borrowed): + // stream: borrowed pointer to the stream that the 'convolve' operation + // should be enqueued onto. + // input_descriptor: dimensions of the input layer. + // input_data: un-owned device memory region which contains the + // convolution input. + // filter_descriptor: dimensions of the convolution filter. + // convolution_descriptor: stride of the convolution filter. + // input. This can be DeviceMemory pointing to NULL only when activation_mode + // is kNone. + // output_descriptor: dimensions of the output layer. + // output_data: un-owned device memory region in which to place the + // convolution result. + // scratch_allocator: un-owned, may-be-null object that may allocate scratch + // space in order to speed up the convolution operation. + // algorithm: an integer to specify which algorithm should be used for the + // operation. kDefaultAlgorithm means the system will pick an algorithm + // by default. The coding of the algorithm is be interpretted by the + // underlying implementation. + // output_profile_result: the output profile result for this call. The + // profiling is only enabled when this is not nullptr. + // + // input_descriptor, filter_descriptor, convolution_descriptor and + // output_descriptor together specify exactly how the convolution is aligned + // with the input data: + // + // * (input dimensions - filter size + 1) / filter stride == output dimensions + // corresponds to dist_belief padding = VALID, i.e. the input is not padded. + // * input dimensions / filter stride == output dimensions + // corresponds to dist_belief padding = SAME, i.e. input and output are the + // same size - this requires padding the input. + // * (input dimensions + filter size - 1) / filter stride == output dimensions + // corresponds to dist_belief padding = FULL, i.e. the output is sized so + // that if the inverse of the filter is applied to the output in VALID mode + // the result is the same size as the input - this requires even more + // padding of the input. virtual bool DoConvolve( Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory<float>& input_data, @@ -1012,8 +1085,7 @@ class DnnSupport { const dnn::AlgorithmConfig& algorithm_config, ProfileResult* output_profile_result) = 0; - // Enqueues a double-precision convolution operation (without bias add - // or activation) onto the stream. + // Enqueues a double-precision convolution operation onto the stream. // See DoConvolve above for argument details. virtual bool DoConvolve( Stream* stream, const dnn::BatchDescriptor& batch_descriptor, @@ -1024,8 +1096,7 @@ class DnnSupport { const dnn::BatchDescriptor& output_descriptor, DeviceMemory<double>* output_data) = 0; - // Enqueues a half-precision convolution operation (without bias add - // or activation) onto the stream. + // Enqueues a half-precision convolution operation onto the stream. // See DoConvolve above for argument details. virtual bool DoConvolve( Stream* stream, const dnn::BatchDescriptor& batch_descriptor, |