diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-01-19 12:47:08 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-01-19 13:08:19 -0800 |
commit | 598583c20dba57e4c99430d345b88dc63bc662f0 (patch) | |
tree | 02940d56fee0e548f81f8861786cd9c45fe97507 /tensorflow/stream_executor/dnn.h | |
parent | 82542b6128c30aecd51d776f2be636993f99bd6a (diff) |
Add convolve quantized ops to StreamExecutor API
Change: 144996696
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 86e56ef186..517f3ea904 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -969,6 +969,32 @@ class DnnSupport { const dnn::AlgorithmConfig& algorithm_config, ProfileResult* output_profile_result) = 0; + // Version of DoConvolve that uses pre-quantized 8 bit coefficients. + // coefficient_scales specifies the scaling of each column of coefficients: + // original float coefficient[row * num_columns + column] = + // quantized coefficient[row * num_columns + column] * + // coefficient_scales[column]. + virtual bool DoConvolveQuantized( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory<float>& input_data, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory<int8>& filter_coefficients, + const DeviceMemory<float>& coefficient_scales, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<float>* output_data) = 0; + + // Same as DoConvolveQuantized above, but int8 filter coefficients. + virtual bool DoConvolveQuantized( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory<float>& input_data, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory<int16>& filter_coefficients, + const DeviceMemory<float>& coefficient_scales, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory<float>* output_data) = 0; + // Variation of the above with the weight matrix split into two matrices. // first_weights: Coefficients of the first matrix. // second_weights: Coefficients of the second matrix. |