aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/stream.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-09-06 13:20:30 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-09-06 13:24:41 -0700
commit2b15badd96c651d4d191426975a1773dff4a03b8 (patch)
tree30406a237f324cb3993c8fbb2c49dd0c1f9ed624 /tensorflow/stream_executor/stream.h
parentca65468a02d4b2ceb78cf5c130ad275a4eefe6bb (diff)
Add int8 version of fused_conv2d_bias_activation operator for the forward phase,
and support side_input and scaling parameters in float and int8 versions. PiperOrigin-RevId: 167763219
Diffstat (limited to 'tensorflow/stream_executor/stream.h')
-rw-r--r--tensorflow/stream_executor/stream.h98
1 files changed, 74 insertions, 24 deletions
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 9bd4c21a66..a418fe961c 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -240,15 +240,17 @@ class Stream {
DeviceMemory<float> *offset_backprop);
// TODO(leary) add double-precision version of this interface.
- Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<float> &input_data,
- const dnn::FilterDescriptor &filter_descriptor,
- const DeviceMemory<float> &filter_data,
- const dnn::ConvolutionDescriptor &convolution_descriptor,
- const DeviceMemory<float> &biases,
- dnn::ActivationMode activation_mode,
- const dnn::BatchDescriptor &output_descriptor,
- DeviceMemory<float> *output);
+ Stream &ThenFusedConvolve(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<int8> &conv_input_data, float conv_input_scale,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int8> &filter_data,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<int8> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor &output_descriptor,
+ DeviceMemory<int8> *output);
Stream &ThenConvolve(const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
@@ -278,23 +280,39 @@ class Stream {
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output_data);
- Stream &ThenConvolveWithScratch(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<Eigen::half> &input_data,
+ Stream &ThenFusedConvolveWithScratch(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<int8> &conv_input_data, float conv_input_scale,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int8> &filter_data,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<int8> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output,
+ ScratchAllocator *scratch_allocator);
+
+ Stream &ThenFusedConvolveWithScratch(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
const DeviceMemory<Eigen::half> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator);
- Stream &ThenConvolveWithScratch(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<float> &input_data,
+ Stream &ThenFusedConvolveWithScratch(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<float> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<float> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator);
@@ -323,7 +341,6 @@ class Stream {
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
- const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
@@ -335,35 +352,68 @@ class Stream {
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
- const DeviceMemory<Eigen::half> &biases,
- dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result);
- Stream &ThenConvolveWithAlgorithm(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<float> &input_data,
+ Stream &ThenFusedConvolveWithAlgorithm(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<double> &conv_input_data, double conv_input_scale,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<double> &filter_data,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<double> &side_input_data, double side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<double> &biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor &output_descriptor,
+ DeviceMemory<double> *output, ScratchAllocator *scratch_allocator,
+ const dnn::AlgorithmConfig &algorithm_config,
+ dnn::ProfileResult *output_profile_result);
+
+ Stream &ThenFusedConvolveWithAlgorithm(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<float> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<float> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result);
- Stream &ThenConvolveWithAlgorithm(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<Eigen::half> &input_data,
+ Stream &ThenFusedConvolveWithAlgorithm(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<Eigen::half> &biases,
+ dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result);
+ Stream &ThenFusedConvolveWithAlgorithm(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<int8> &conv_input_data, float conv_input_scale,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int8> &filter_data,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<int8> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output,
+ ScratchAllocator *scratch_allocator,
+ const dnn::AlgorithmConfig &algorithm_config,
+ dnn::ProfileResult *output_profile_result);
+
Stream &ThenSeparableConvolve(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,