aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/dnn.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-09-06 13:20:30 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-09-06 13:24:41 -0700
commit2b15badd96c651d4d191426975a1773dff4a03b8 (patch)
tree30406a237f324cb3993c8fbb2c49dd0c1f9ed624 /tensorflow/stream_executor/dnn.h
parentca65468a02d4b2ceb78cf5c130ad275a4eefe6bb (diff)
Add int8 version of fused_conv2d_bias_activation operator for the forward phase,
and support side_input and scaling parameters in float and int8 versions. PiperOrigin-RevId: 167763219
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r--tensorflow/stream_executor/dnn.h139
1 files changed, 105 insertions, 34 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 0a0ad7d9fb..0a4525c1b7 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -669,6 +669,7 @@ class PoolingDescriptor {
typedef int64 AlgorithmType;
constexpr AlgorithmType kDefaultAlgorithm = -1;
+constexpr AlgorithmType kNoSuitableAlgorithmFound = -2;
// Describes the result from a perf experiment.
//
@@ -912,20 +913,32 @@ class DnnSupport {
return false;
}
- // Enqueues a single-precision convolution operation onto the stream.
+ // Enqueues a fused convolution operation onto the stream.
+ // We provide several variants with different types for inputs, biases and
+ // scaling parameters.
//
// Arguments (all borrowed):
// stream: borrowed pointer to the stream that the 'convolve' operation
// should be enqueued onto.
- // input_descriptor: dimensions of the input layer.
- // input_data: un-owned device memory region which contains the
+ // conv_input_descriptor: dimensions of the convolution input layer.
+ // conv_input_data: un-owned device memory region which contains the
// convolution input.
+ // conv_input_scale: a floating point scale to multiply with each element
+ // of conv_input_data.
// filter_descriptor: dimensions of the convolution filter.
+ // filter_data: un-owned device memory region which contains the
+ // convolution filter weights.
// convolution_descriptor: stride of the convolution filter.
// biases: un-owned device memory region containing biases to add to the
- // input. This can be DeviceMemory pointing to NULL only when activation_mode
- // is kNone.
+ // input.
// activation_mode: Type of activation to perform.
+ // side_input_data: un-owned device memory region which contains optional
+ // side input data. If 'side_input_scale' is non-zero, then this must
+ // point to data in the tensor shape specified by output_shape.
+ // It will be scaled by 'side_input_scale' and added to the convolution
+ // result and bias prior to applying the activation function.
+ // side_input_scale: a floating point scale to multiply with each element
+ // of side_input_data.
// output_descriptor: dimensions of the output layer.
// output_data: un-owned device memory region in which to place the
// convolution result.
@@ -938,7 +951,7 @@ class DnnSupport {
// output_profile_result: the output profile result for this call. The
// profiling is only enabled when this is not nullptr.
//
- // input_descriptor, filter_descriptor, convolution_descriptor and
+ // conv_input_descriptor, filter_descriptor, convolution_descriptor and
// output_descriptor together specify exactly how the convolution is aligned
// with the input data:
//
@@ -952,55 +965,115 @@ class DnnSupport {
// that if the inverse of the filter is applied to the output in VALID mode
// the result is the same size as the input - this requires even more
// padding of the input.
- virtual bool DoConvolve(
- Stream* stream, const dnn::BatchDescriptor& input_descriptor,
- const DeviceMemory<float>& input_data,
+ virtual bool DoFusedConvolve(
+ Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
+ const DeviceMemory<double>& conv_input_data, double conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
- const DeviceMemory<float>& filter_data,
+ const DeviceMemory<double>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
- const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
+ const DeviceMemory<double>& side_input_data, double side_input_scale,
+ const dnn::BatchDescriptor& bias_descriptor,
+ const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
- DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
+ DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
- ProfileResult* output_profile_result) {
+ dnn::ProfileResult* output_profile_result) {
return false;
}
- // Enqueues a double-precision fused convolution, bias add, and activation
- // operation onto the stream. See DoConvolve above for argument details.
- virtual bool DoConvolve(
- Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
- const DeviceMemory<double>& input_data,
+ // This is the float version of DoFusedConvolve.
+ virtual bool DoFusedConvolve(
+ Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
+ const DeviceMemory<float>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
- const DeviceMemory<double>& filter_data,
+ const DeviceMemory<float>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
- const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
+ const DeviceMemory<float>& side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor& bias_descriptor,
+ const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
- DeviceMemory<double>* output_data) {
+ DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
+ const dnn::AlgorithmConfig& algorithm_config,
+ dnn::ProfileResult* output_profile_result) {
return false;
}
- // Enqueues a half-precision fused convolution, bias add, and activation
- // operation onto the stream. See DoConvolve above for argument details.
- virtual bool DoConvolve(
- Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
- const DeviceMemory<Eigen::half>& input_data,
+ // This is the Eigen::half version of DoFusedConvolve.
+ // The scaling parameters are still floats.
+ virtual bool DoFusedConvolve(
+ Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
+ const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const DeviceMemory<Eigen::half>& side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor& bias_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
- ProfileResult* output_profile_result) {
+ dnn::ProfileResult* output_profile_result) {
return false;
}
- // Enqueues a single-precision convolution operation (without bias add
- // or activation) onto the stream.
- // See DoConvolve above for argument details.
+ // This is the int8 version of DoFusedConvolve.
+ // The bias input and scaling parameters are floats.
+ virtual bool DoFusedConvolve(
+ Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
+ const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<int8>& filter_data,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const DeviceMemory<int8>& side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor& bias_descriptor,
+ const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<int8>* output_data, ScratchAllocator* scratch_allocator,
+ const dnn::AlgorithmConfig& algorithm_config,
+ dnn::ProfileResult* output_profile_result) {
+ return false;
+ }
+
+ // Enqueues a single-precision convolution operation onto the stream.
+ //
+ // Arguments (all borrowed):
+ // stream: borrowed pointer to the stream that the 'convolve' operation
+ // should be enqueued onto.
+ // input_descriptor: dimensions of the input layer.
+ // input_data: un-owned device memory region which contains the
+ // convolution input.
+ // filter_descriptor: dimensions of the convolution filter.
+ // convolution_descriptor: stride of the convolution filter.
+ // input. This can be DeviceMemory pointing to NULL only when activation_mode
+ // is kNone.
+ // output_descriptor: dimensions of the output layer.
+ // output_data: un-owned device memory region in which to place the
+ // convolution result.
+ // scratch_allocator: un-owned, may-be-null object that may allocate scratch
+ // space in order to speed up the convolution operation.
+ // algorithm: an integer to specify which algorithm should be used for the
+ // operation. kDefaultAlgorithm means the system will pick an algorithm
+ // by default. The coding of the algorithm is be interpretted by the
+ // underlying implementation.
+ // output_profile_result: the output profile result for this call. The
+ // profiling is only enabled when this is not nullptr.
+ //
+ // input_descriptor, filter_descriptor, convolution_descriptor and
+ // output_descriptor together specify exactly how the convolution is aligned
+ // with the input data:
+ //
+ // * (input dimensions - filter size + 1) / filter stride == output dimensions
+ // corresponds to dist_belief padding = VALID, i.e. the input is not padded.
+ // * input dimensions / filter stride == output dimensions
+ // corresponds to dist_belief padding = SAME, i.e. input and output are the
+ // same size - this requires padding the input.
+ // * (input dimensions + filter size - 1) / filter stride == output dimensions
+ // corresponds to dist_belief padding = FULL, i.e. the output is sized so
+ // that if the inverse of the filter is applied to the output in VALID mode
+ // the result is the same size as the input - this requires even more
+ // padding of the input.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
@@ -1012,8 +1085,7 @@ class DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) = 0;
- // Enqueues a double-precision convolution operation (without bias add
- // or activation) onto the stream.
+ // Enqueues a double-precision convolution operation onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@@ -1024,8 +1096,7 @@ class DnnSupport {
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) = 0;
- // Enqueues a half-precision convolution operation (without bias add
- // or activation) onto the stream.
+ // Enqueues a half-precision convolution operation onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,