diff options
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.h | 26 | ||||
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 26 | ||||
-rw-r--r-- | tensorflow/stream_executor/stream.cc | 60 | ||||
-rw-r--r-- | tensorflow/stream_executor/stream.h | 20 |
4 files changed, 132 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index 8101ebf258..86292bf498 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -164,6 +164,32 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) override; + bool DoConvolveQuantized( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory<float>& input_data, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory<int8>& filter_coefficients, + const DeviceMemory<float>& coefficient_scales, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<float>* output_data) override { + LOG(ERROR) << "DoConvolveQuantized not supported by cuDNN"; + return false; + } + + bool DoConvolveQuantized( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory<float>& input_data, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory<int16>& filter_coefficients, + const DeviceMemory<float>& coefficient_scales, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<float>* output_data) override { + LOG(ERROR) << "DoConvolveQuantized not supported by cuDNN"; + return false; + } + bool DoSeparableConvolve( Stream* stream, const dnn::BatchDescriptor& batch_descriptor, const DeviceMemory<float>& input_data, diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 86e56ef186..517f3ea904 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -969,6 +969,32 @@ class DnnSupport { const dnn::AlgorithmConfig& algorithm_config, ProfileResult* output_profile_result) = 0; + // Version of DoConvolve that uses pre-quantized 8 bit coefficients. + // coefficient_scales specifies the scaling of each column of coefficients: + // original float coefficient[row * num_columns + column] = + // quantized coefficient[row * num_columns + column] * + // coefficient_scales[column]. + virtual bool DoConvolveQuantized( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory<float>& input_data, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory<int8>& filter_coefficients, + const DeviceMemory<float>& coefficient_scales, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<float>* output_data) = 0; + + // Same as DoConvolveQuantized above, but int8 filter coefficients. + virtual bool DoConvolveQuantized( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory<float>& input_data, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory<int16>& filter_coefficients, + const DeviceMemory<float>& coefficient_scales, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<float>* output_data) = 0; + // Variation of the above with the weight matrix split into two matrices. // first_weights: Coefficients of the first matrix. // second_weights: Coefficients of the second matrix. diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 980d544b01..7712a3697c 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -468,6 +468,66 @@ Stream &Stream::ThenConvolve( output, /*scratch_allocator=*/nullptr); } +Stream &Stream::ThenConvolveQuantized( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory<float> &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int8> &filter_coefficients, + const DeviceMemory<float> &coefficient_scales, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory<float> *output) { + VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), + PARAM(filter_descriptor), PARAM(filter_coefficients), + PARAM(coefficient_scales), PARAM(convolution_descriptor), + PARAM(output_descriptor), PARAM(output)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveQuantized( + this, input_descriptor, input_data, filter_descriptor, + filter_coefficients, coefficient_scales, convolution_descriptor, + output_descriptor, output)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; +} + +Stream &Stream::ThenConvolveQuantized( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory<float> &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int16> &filter_coefficients, + const DeviceMemory<float> &coefficient_scales, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory<float> *output) { + VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), + PARAM(filter_descriptor), PARAM(filter_coefficients), + PARAM(coefficient_scales), PARAM(convolution_descriptor), + PARAM(output_descriptor), PARAM(output)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveQuantized( + this, input_descriptor, input_data, filter_descriptor, + filter_coefficients, coefficient_scales, convolution_descriptor, + output_descriptor, output)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; +} + Stream &Stream::ThenSeparableConvolve( const dnn::BatchDescriptor &batch_descriptor, const DeviceMemory<float> &input_data, diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index 711eb3079a..8a8b4b1660 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -245,6 +245,26 @@ class Stream { const dnn::BatchDescriptor &output_descriptor, DeviceMemory<float> *output); + Stream &ThenConvolveQuantized( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory<float> &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int8> &filter_coefficients, + const DeviceMemory<float> &coefficient_scales, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory<float> *output_data); + + Stream &ThenConvolveQuantized( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory<float> &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory<int16> &filter_coefficients, + const DeviceMemory<float> &coefficient_scales, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory<float> *output_data); + Stream &ThenConvolveWithScratch( const dnn::BatchDescriptor &input_descriptor, const DeviceMemory<Eigen::half> &input_data, |