aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/stream.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-08-23 16:58:50 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-23 17:02:42 -0700
commit1f41602a82cb68fc7bc7e51cf9590a87ee5baf4d (patch)
tree329ba37a6761ae9506d94088b9c8d3c2e90d5803 /tensorflow/stream_executor/stream.cc
parent2272987f13be76105fcd24dd38cf768c2d4fec0d (diff)
Add int8 version of fused_conv2d_bias_activation operator for the forward phase,
and support side_input and scaling parameters in float and int8 versions. PiperOrigin-RevId: 166276461
Diffstat (limited to 'tensorflow/stream_executor/stream.cc')
-rw-r--r--tensorflow/stream_executor/stream.cc206
1 files changed, 150 insertions, 56 deletions
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index c9b36ba7ab..dc768e0273 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -361,28 +361,66 @@ Stream &Stream::ThenBatchNormalizationBackward(
return *this;
}
-Stream &Stream::ThenConvolveWithScratch(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<Eigen::half> &input_data,
+Stream &Stream::ThenFusedConvolveWithScratch(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<int8> &conv_input_data, float conv_input_scale,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int8> &filter_data,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<int8> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output,
+ ScratchAllocator *scratch_allocator) {
+ VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data),
+ PARAM(conv_input_scale), PARAM(filter_descriptor),
+ PARAM(filter_data), PARAM(convolution_descriptor),
+ PARAM(side_input_data), PARAM(side_input_scale),
+ PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
+ PARAM(output_descriptor), PARAM(output));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoFusedConvolve(
+ this, conv_input_descriptor, conv_input_data, conv_input_scale,
+ filter_descriptor, filter_data, convolution_descriptor,
+ side_input_data, side_input_scale, bias_descriptor, biases,
+ activation_mode, output_descriptor, output, scratch_allocator,
+ dnn::AlgorithmConfig(), /*output_profile_result=*/nullptr));
+ } else {
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenFusedConvolveWithScratch(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
const DeviceMemory<Eigen::half> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator) {
- VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
- PARAM(filter_descriptor), PARAM(filter_data),
- PARAM(convolution_descriptor), PARAM(biases),
- PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
+ VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data),
+ PARAM(conv_input_scale), PARAM(filter_descriptor),
+ PARAM(filter_data), PARAM(convolution_descriptor),
+ PARAM(side_input_data), PARAM(side_input_scale),
+ PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
+ PARAM(output_descriptor), PARAM(output));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
- CheckError(dnn->DoConvolve(
- this, input_descriptor, input_data, filter_descriptor, filter_data,
- convolution_descriptor, biases, activation_mode, output_descriptor,
- output, scratch_allocator, dnn::AlgorithmConfig(),
- /*output_profile_result=*/nullptr));
+ CheckError(dnn->DoFusedConvolve(
+ this, conv_input_descriptor, conv_input_data, conv_input_scale,
+ filter_descriptor, filter_data, convolution_descriptor,
+ side_input_data, side_input_scale, bias_descriptor, biases,
+ activation_mode, output_descriptor, output, scratch_allocator,
+ dnn::AlgorithmConfig(), /*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -390,27 +428,32 @@ Stream &Stream::ThenConvolveWithScratch(
return *this;
}
-Stream &Stream::ThenConvolveWithScratch(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<float> &input_data,
+Stream &Stream::ThenFusedConvolveWithScratch(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<float> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<float> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output,
ScratchAllocator *scratch_allocator) {
- VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
- PARAM(filter_descriptor), PARAM(filter_data),
- PARAM(convolution_descriptor), PARAM(biases),
- PARAM(activation_mode), PARAM(output_descriptor), PARAM(output));
+ VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data),
+ PARAM(conv_input_scale), PARAM(filter_descriptor),
+ PARAM(filter_data), PARAM(convolution_descriptor),
+ PARAM(side_input_data), PARAM(side_input_scale),
+ PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
+ PARAM(output_descriptor), PARAM(output));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
- CheckError(dnn->DoConvolve(
- this, input_descriptor, input_data, filter_descriptor, filter_data,
- convolution_descriptor, biases, activation_mode, output_descriptor,
- output, scratch_allocator, dnn::AlgorithmConfig(),
- /*output_profile_result=*/nullptr));
+ CheckError(dnn->DoFusedConvolve(
+ this, conv_input_descriptor, conv_input_data, conv_input_scale,
+ filter_descriptor, filter_data, convolution_descriptor,
+ side_input_data, side_input_scale, bias_descriptor, biases,
+ activation_mode, output_descriptor, output, scratch_allocator,
+ dnn::AlgorithmConfig(), /*output_profile_result=*/nullptr));
} else {
SetErrorAndLogNoDnnSupport();
}
@@ -472,29 +515,34 @@ Stream &Stream::ThenConvolveWithScratch(
return *this;
}
-Stream &Stream::ThenConvolveWithAlgorithm(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<float> &input_data,
+Stream &Stream::ThenFusedConvolveWithAlgorithm(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<float> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<float> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<float> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output,
ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result) {
- VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
- PARAM(filter_descriptor), PARAM(filter_data),
- PARAM(convolution_descriptor), PARAM(biases),
+ VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data),
+ PARAM(conv_input_scale), PARAM(filter_descriptor),
+ PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases),
+ PARAM(side_input_data), PARAM(side_input_scale),
PARAM(activation_mode), PARAM(output_descriptor), PARAM(output),
PARAM(algorithm_config));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
- auto status = dnn->DoConvolve(
- this, input_descriptor, input_data, filter_descriptor, filter_data,
- convolution_descriptor, biases, activation_mode, output_descriptor,
- output, scratch_allocator, algorithm_config, output_profile_result);
+ auto status = dnn->DoFusedConvolve(
+ this, conv_input_descriptor, conv_input_data, conv_input_scale,
+ filter_descriptor, filter_data, convolution_descriptor,
+ side_input_data, side_input_scale, bias_descriptor, biases,
+ activation_mode, output_descriptor, output, scratch_allocator,
+ algorithm_config, output_profile_result);
if (!status && !output_profile_result) {
SetError();
}
@@ -505,30 +553,73 @@ Stream &Stream::ThenConvolveWithAlgorithm(
return *this;
}
-Stream &Stream::ThenConvolveWithAlgorithm(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<Eigen::half> &input_data,
+Stream &Stream::ThenFusedConvolveWithAlgorithm(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<Eigen::half> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
const DeviceMemory<Eigen::half> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<Eigen::half> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
const DeviceMemory<Eigen::half> &biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<Eigen::half> *output, ScratchAllocator *scratch_allocator,
const dnn::AlgorithmConfig &algorithm_config,
dnn::ProfileResult *output_profile_result) {
- VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
- PARAM(filter_descriptor), PARAM(filter_data),
- PARAM(convolution_descriptor), PARAM(biases),
- PARAM(activation_mode), PARAM(output_descriptor), PARAM(output),
- PARAM(algorithm_config));
+ VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data),
+ PARAM(conv_input_scale), PARAM(filter_descriptor),
+ PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases),
+ PARAM(side_input_data), PARAM(side_input_scale),
+ PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
+ PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
- auto status = dnn->DoConvolve(
- this, input_descriptor, input_data, filter_descriptor, filter_data,
- convolution_descriptor, biases, activation_mode, output_descriptor,
- output, scratch_allocator, algorithm_config, output_profile_result);
+ auto status = dnn->DoFusedConvolve(
+ this, conv_input_descriptor, conv_input_data, conv_input_scale,
+ filter_descriptor, filter_data, convolution_descriptor,
+ side_input_data, side_input_scale, bias_descriptor, biases,
+ activation_mode, output_descriptor, output, scratch_allocator,
+ algorithm_config, output_profile_result);
+ if (!status && !output_profile_result) {
+ SetError();
+ }
+ } else {
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenFusedConvolveWithAlgorithm(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<int8> &conv_input_data, float conv_input_scale,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int8> &filter_data,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<int8> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
+ const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
+ const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output,
+ ScratchAllocator *scratch_allocator,
+ const dnn::AlgorithmConfig &algorithm_config,
+ dnn::ProfileResult *output_profile_result) {
+ VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data),
+ PARAM(conv_input_scale), PARAM(filter_descriptor),
+ PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases),
+ PARAM(side_input_data), PARAM(side_input_scale),
+ PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
+ PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ auto status = dnn->DoFusedConvolve(
+ this, conv_input_descriptor, conv_input_data, conv_input_scale,
+ filter_descriptor, filter_data, convolution_descriptor,
+ side_input_data, side_input_scale, bias_descriptor, biases,
+ activation_mode, output_descriptor, output, scratch_allocator,
+ algorithm_config, output_profile_result);
if (!status && !output_profile_result) {
SetError();
}
@@ -601,19 +692,22 @@ Stream &Stream::ThenConvolveWithAlgorithm(
return *this;
}
-Stream &Stream::ThenConvolve(
- const dnn::BatchDescriptor &input_descriptor,
- const DeviceMemory<float> &input_data,
+Stream &Stream::ThenFusedConvolve(
+ const dnn::BatchDescriptor &conv_input_descriptor,
+ const DeviceMemory<int8> &conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor &filter_descriptor,
- const DeviceMemory<float> &filter_data,
+ const DeviceMemory<int8> &filter_data,
const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const DeviceMemory<int8> &side_input_data, float side_input_scale,
+ const dnn::BatchDescriptor &bias_descriptor,
const DeviceMemory<float> &biases, dnn::ActivationMode activation_mode,
- const dnn::BatchDescriptor &output_descriptor,
- DeviceMemory<float> *output) {
- return ThenConvolveWithScratch(
- input_descriptor, input_data, filter_descriptor, filter_data,
- convolution_descriptor, biases, activation_mode, output_descriptor,
- output, /*scratch_allocator=*/nullptr);
+ const dnn::BatchDescriptor &output_descriptor, DeviceMemory<int8> *output) {
+ return ThenFusedConvolveWithScratch(
+ conv_input_descriptor, conv_input_data, conv_input_scale,
+ filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+ side_input_scale, bias_descriptor, biases, activation_mode,
+ output_descriptor, output,
+ /*scratch_allocator=*/nullptr);
}
Stream &Stream::ThenConvolve(