aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/dnn.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-08-24 00:00:52 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-24 00:04:32 -0700
commit91b00a110feb83b7307ea9c280142007090f3cd9 (patch)
tree65b55fce64f9685476deca665686588def6e95b0 /tensorflow/stream_executor/dnn.h
parent2f531b54ce51fdb9fc2b055548e534a13624ea93 (diff)
Automated g4 rollback of changelist 166276461
PiperOrigin-RevId: 166305887
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r--tensorflow/stream_executor/dnn.h139
1 files changed, 34 insertions, 105 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 0a4525c1b7..0a0ad7d9fb 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -669,7 +669,6 @@ class PoolingDescriptor {
typedef int64 AlgorithmType;
constexpr AlgorithmType kDefaultAlgorithm = -1;
-constexpr AlgorithmType kNoSuitableAlgorithmFound = -2;
// Describes the result from a perf experiment.
//
@@ -913,32 +912,20 @@ class DnnSupport {
return false;
}
- // Enqueues a fused convolution operation onto the stream.
- // We provide several variants with different types for inputs, biases and
- // scaling parameters.
+ // Enqueues a single-precision convolution operation onto the stream.
//
// Arguments (all borrowed):
// stream: borrowed pointer to the stream that the 'convolve' operation
// should be enqueued onto.
- // conv_input_descriptor: dimensions of the convolution input layer.
- // conv_input_data: un-owned device memory region which contains the
+ // input_descriptor: dimensions of the input layer.
+ // input_data: un-owned device memory region which contains the
// convolution input.
- // conv_input_scale: a floating point scale to multiply with each element
- // of conv_input_data.
// filter_descriptor: dimensions of the convolution filter.
- // filter_data: un-owned device memory region which contains the
- // convolution filter weights.
// convolution_descriptor: stride of the convolution filter.
// biases: un-owned device memory region containing biases to add to the
- // input.
+ // input. This can be DeviceMemory pointing to NULL only when activation_mode
+ // is kNone.
// activation_mode: Type of activation to perform.
- // side_input_data: un-owned device memory region which contains optional
- // side input data. If 'side_input_scale' is non-zero, then this must
- // point to data in the tensor shape specified by output_shape.
- // It will be scaled by 'side_input_scale' and added to the convolution
- // result and bias prior to applying the activation function.
- // side_input_scale: a floating point scale to multiply with each element
- // of side_input_data.
// output_descriptor: dimensions of the output layer.
// output_data: un-owned device memory region in which to place the
// convolution result.
@@ -951,7 +938,7 @@ class DnnSupport {
// output_profile_result: the output profile result for this call. The
// profiling is only enabled when this is not nullptr.
//
- // conv_input_descriptor, filter_descriptor, convolution_descriptor and
+ // input_descriptor, filter_descriptor, convolution_descriptor and
// output_descriptor together specify exactly how the convolution is aligned
// with the input data:
//
@@ -965,115 +952,55 @@ class DnnSupport {
// that if the inverse of the filter is applied to the output in VALID mode
// the result is the same size as the input - this requires even more
// padding of the input.
- virtual bool DoFusedConvolve(
- Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
- const DeviceMemory<double>& conv_input_data, double conv_input_scale,
+ virtual bool DoConvolve(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
- const DeviceMemory<double>& filter_data,
+ const DeviceMemory<float>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
- const DeviceMemory<double>& side_input_data, double side_input_scale,
- const dnn::BatchDescriptor& bias_descriptor,
- const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
+ const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
- DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
+ DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
- dnn::ProfileResult* output_profile_result) {
+ ProfileResult* output_profile_result) {
return false;
}
- // This is the float version of DoFusedConvolve.
- virtual bool DoFusedConvolve(
- Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
- const DeviceMemory<float>& conv_input_data, float conv_input_scale,
+ // Enqueues a double-precision fused convolution, bias add, and activation
+ // operation onto the stream. See DoConvolve above for argument details.
+ virtual bool DoConvolve(
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
+ const DeviceMemory<double>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
- const DeviceMemory<float>& filter_data,
+ const DeviceMemory<double>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
- const DeviceMemory<float>& side_input_data, float side_input_scale,
- const dnn::BatchDescriptor& bias_descriptor,
- const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
+ const DeviceMemory<double>& biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
- DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
- const dnn::AlgorithmConfig& algorithm_config,
- dnn::ProfileResult* output_profile_result) {
+ DeviceMemory<double>* output_data) {
return false;
}
- // This is the Eigen::half version of DoFusedConvolve.
- // The scaling parameters are still floats.
- virtual bool DoFusedConvolve(
- Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
- const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
+ // Enqueues a half-precision fused convolution, bias add, and activation
+ // operation onto the stream. See DoConvolve above for argument details.
+ virtual bool DoConvolve(
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
+ const DeviceMemory<Eigen::half>& input_data,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
- const DeviceMemory<Eigen::half>& side_input_data, float side_input_scale,
- const dnn::BatchDescriptor& bias_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
- dnn::ProfileResult* output_profile_result) {
- return false;
- }
-
- // This is the int8 version of DoFusedConvolve.
- // The bias input and scaling parameters are floats.
- virtual bool DoFusedConvolve(
- Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
- const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
- const dnn::FilterDescriptor& filter_descriptor,
- const DeviceMemory<int8>& filter_data,
- const dnn::ConvolutionDescriptor& convolution_descriptor,
- const DeviceMemory<int8>& side_input_data, float side_input_scale,
- const dnn::BatchDescriptor& bias_descriptor,
- const DeviceMemory<float>& biases, dnn::ActivationMode activation_mode,
- const dnn::BatchDescriptor& output_descriptor,
- DeviceMemory<int8>* output_data, ScratchAllocator* scratch_allocator,
- const dnn::AlgorithmConfig& algorithm_config,
- dnn::ProfileResult* output_profile_result) {
+ ProfileResult* output_profile_result) {
return false;
}
- // Enqueues a single-precision convolution operation onto the stream.
- //
- // Arguments (all borrowed):
- // stream: borrowed pointer to the stream that the 'convolve' operation
- // should be enqueued onto.
- // input_descriptor: dimensions of the input layer.
- // input_data: un-owned device memory region which contains the
- // convolution input.
- // filter_descriptor: dimensions of the convolution filter.
- // convolution_descriptor: stride of the convolution filter.
- // input. This can be DeviceMemory pointing to NULL only when activation_mode
- // is kNone.
- // output_descriptor: dimensions of the output layer.
- // output_data: un-owned device memory region in which to place the
- // convolution result.
- // scratch_allocator: un-owned, may-be-null object that may allocate scratch
- // space in order to speed up the convolution operation.
- // algorithm: an integer to specify which algorithm should be used for the
- // operation. kDefaultAlgorithm means the system will pick an algorithm
- // by default. The coding of the algorithm is be interpretted by the
- // underlying implementation.
- // output_profile_result: the output profile result for this call. The
- // profiling is only enabled when this is not nullptr.
- //
- // input_descriptor, filter_descriptor, convolution_descriptor and
- // output_descriptor together specify exactly how the convolution is aligned
- // with the input data:
- //
- // * (input dimensions - filter size + 1) / filter stride == output dimensions
- // corresponds to dist_belief padding = VALID, i.e. the input is not padded.
- // * input dimensions / filter stride == output dimensions
- // corresponds to dist_belief padding = SAME, i.e. input and output are the
- // same size - this requires padding the input.
- // * (input dimensions + filter size - 1) / filter stride == output dimensions
- // corresponds to dist_belief padding = FULL, i.e. the output is sized so
- // that if the inverse of the filter is applied to the output in VALID mode
- // the result is the same size as the input - this requires even more
- // padding of the input.
+ // Enqueues a single-precision convolution operation (without bias add
+ // or activation) onto the stream.
+ // See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
@@ -1085,7 +1012,8 @@ class DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) = 0;
- // Enqueues a double-precision convolution operation onto the stream.
+ // Enqueues a double-precision convolution operation (without bias add
+ // or activation) onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@@ -1096,7 +1024,8 @@ class DnnSupport {
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double>* output_data) = 0;
- // Enqueues a half-precision convolution operation onto the stream.
+ // Enqueues a half-precision convolution operation (without bias add
+ // or activation) onto the stream.
// See DoConvolve above for argument details.
virtual bool DoConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,