aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.h26
-rw-r--r--tensorflow/stream_executor/dnn.h26
-rw-r--r--tensorflow/stream_executor/stream.cc60
-rw-r--r--tensorflow/stream_executor/stream.h20
4 files changed, 132 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 8101ebf258..86292bf498 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -164,6 +164,32 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
+ bool DoConvolveQuantized(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<int8>& filter_coefficients,
+ const DeviceMemory<float>& coefficient_scales,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data) override {
+ LOG(ERROR) << "DoConvolveQuantized not supported by cuDNN";
+ return false;
+ }
+
+ bool DoConvolveQuantized(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<int16>& filter_coefficients,
+ const DeviceMemory<float>& coefficient_scales,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data) override {
+ LOG(ERROR) << "DoConvolveQuantized not supported by cuDNN";
+ return false;
+ }
+
bool DoSeparableConvolve(
Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<float>& input_data,
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 86e56ef186..517f3ea904 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -969,6 +969,32 @@ class DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) = 0;
+ // Version of DoConvolve that uses pre-quantized 8 bit coefficients.
+ // coefficient_scales specifies the scaling of each column of coefficients:
+ // original float coefficient[row * num_columns + column] =
+ // quantized coefficient[row * num_columns + column] *
+ // coefficient_scales[column].
+ virtual bool DoConvolveQuantized(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<int8>& filter_coefficients,
+ const DeviceMemory<float>& coefficient_scales,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data) = 0;
+
+ // Same as DoConvolveQuantized above, but int8 filter coefficients.
+ virtual bool DoConvolveQuantized(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<int16>& filter_coefficients,
+ const DeviceMemory<float>& coefficient_scales,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data) = 0;
+
// Variation of the above with the weight matrix split into two matrices.
// first_weights: Coefficients of the first matrix.
// second_weights: Coefficients of the second matrix.
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 980d544b01..7712a3697c 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -468,6 +468,66 @@ Stream &Stream::ThenConvolve(
output, /*scratch_allocator=*/nullptr);
}
+Stream &Stream::ThenConvolveQuantized(
+ const dnn::BatchDescriptor &input_descriptor,
+ const DeviceMemory<float> &input_data,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int8> &filter_coefficients,
+ const DeviceMemory<float> &coefficient_scales,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const dnn::BatchDescriptor &output_descriptor,
+ DeviceMemory<float> *output) {
+ VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
+ PARAM(filter_descriptor), PARAM(filter_coefficients),
+ PARAM(coefficient_scales), PARAM(convolution_descriptor),
+ PARAM(output_descriptor), PARAM(output));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoConvolveQuantized(
+ this, input_descriptor, input_data, filter_descriptor,
+ filter_coefficients, coefficient_scales, convolution_descriptor,
+ output_descriptor, output));
+ } else {
+ SetError();
+ LOG(WARNING)
+ << "attempting to perform DNN operation using StreamExecutor "
+ "without DNN support";
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenConvolveQuantized(
+ const dnn::BatchDescriptor &input_descriptor,
+ const DeviceMemory<float> &input_data,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int16> &filter_coefficients,
+ const DeviceMemory<float> &coefficient_scales,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const dnn::BatchDescriptor &output_descriptor,
+ DeviceMemory<float> *output) {
+ VLOG_CALL(PARAM(input_descriptor), PARAM(input_data),
+ PARAM(filter_descriptor), PARAM(filter_coefficients),
+ PARAM(coefficient_scales), PARAM(convolution_descriptor),
+ PARAM(output_descriptor), PARAM(output));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoConvolveQuantized(
+ this, input_descriptor, input_data, filter_descriptor,
+ filter_coefficients, coefficient_scales, convolution_descriptor,
+ output_descriptor, output));
+ } else {
+ SetError();
+ LOG(WARNING)
+ << "attempting to perform DNN operation using StreamExecutor "
+ "without DNN support";
+ }
+ }
+ return *this;
+}
+
Stream &Stream::ThenSeparableConvolve(
const dnn::BatchDescriptor &batch_descriptor,
const DeviceMemory<float> &input_data,
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 711eb3079a..8a8b4b1660 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -245,6 +245,26 @@ class Stream {
const dnn::BatchDescriptor &output_descriptor,
DeviceMemory<float> *output);
+ Stream &ThenConvolveQuantized(
+ const dnn::BatchDescriptor &input_descriptor,
+ const DeviceMemory<float> &input_data,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int8> &filter_coefficients,
+ const DeviceMemory<float> &coefficient_scales,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const dnn::BatchDescriptor &output_descriptor,
+ DeviceMemory<float> *output_data);
+
+ Stream &ThenConvolveQuantized(
+ const dnn::BatchDescriptor &input_descriptor,
+ const DeviceMemory<float> &input_data,
+ const dnn::FilterDescriptor &filter_descriptor,
+ const DeviceMemory<int16> &filter_coefficients,
+ const DeviceMemory<float> &coefficient_scales,
+ const dnn::ConvolutionDescriptor &convolution_descriptor,
+ const dnn::BatchDescriptor &output_descriptor,
+ DeviceMemory<float> *output_data);
+
Stream &ThenConvolveWithScratch(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,