Add convolve quantized ops to StreamExecutor API

Change: 144996696
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-01-19 12:47:08 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-01-19 13:08:19 -0800
commit: 598583c20dba57e4c99430d345b88dc63bc662f0 (patch)
tree: 02940d56fee0e548f81f8861786cd9c45fe97507 /tensorflow/stream_executor/dnn.h
parent: 82542b6128c30aecd51d776f2be636993f99bd6a (diff)
1 files changed, 26 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 86e56ef186..517f3ea904 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -969,6 +969,32 @@ class DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       ProfileResult* output_profile_result) = 0;
 
+  // Version of DoConvolve that uses pre-quantized 8 bit coefficients.
+  // coefficient_scales specifies the scaling of each column of coefficients:
+  // original float coefficient[row * num_columns + column] =
+  //     quantized coefficient[row * num_columns + column] *
+  //     coefficient_scales[column].
+  virtual bool DoConvolveQuantized(
+      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+      const DeviceMemory<float>& input_data,
+      const dnn::FilterDescriptor& filter_descriptor,
+      const DeviceMemory<int8>& filter_coefficients,
+      const DeviceMemory<float>& coefficient_scales,
+      const dnn::ConvolutionDescriptor& convolution_descriptor,
+      const dnn::BatchDescriptor& output_descriptor,
+      DeviceMemory<float>* output_data) = 0;
+
+  // Same as DoConvolveQuantized above, but int8 filter coefficients.
+  virtual bool DoConvolveQuantized(
+      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+      const DeviceMemory<float>& input_data,
+      const dnn::FilterDescriptor& filter_descriptor,
+      const DeviceMemory<int16>& filter_coefficients,
+      const DeviceMemory<float>& coefficient_scales,
+      const dnn::ConvolutionDescriptor& convolution_descriptor,
+      const dnn::BatchDescriptor& output_descriptor,
+      DeviceMemory<float>* output_data) = 0;
+
   // Variation of the above with the weight matrix split into two matrices.
   // first_weights: Coefficients of the first matrix.
   // second_weights: Coefficients of the second matrix.
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-01-19 12:47:08 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-01-19 13:08:19 -0800
commit	598583c20dba57e4c99430d345b88dc63bc662f0 (patch)
tree	02940d56fee0e548f81f8861786cd9c45fe97507 /tensorflow/stream_executor/dnn.h
parent	82542b6128c30aecd51d776f2be636993f99bd6a (diff)