aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/dnn.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-01-19 12:47:08 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-01-19 13:08:19 -0800
commit598583c20dba57e4c99430d345b88dc63bc662f0 (patch)
tree02940d56fee0e548f81f8861786cd9c45fe97507 /tensorflow/stream_executor/dnn.h
parent82542b6128c30aecd51d776f2be636993f99bd6a (diff)
Add convolve quantized ops to StreamExecutor API
Change: 144996696
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r--tensorflow/stream_executor/dnn.h26
1 files changed, 26 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 86e56ef186..517f3ea904 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -969,6 +969,32 @@ class DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
ProfileResult* output_profile_result) = 0;
+ // Version of DoConvolve that uses pre-quantized 8 bit coefficients.
+ // coefficient_scales specifies the scaling of each column of coefficients:
+ // original float coefficient[row * num_columns + column] =
+ // quantized coefficient[row * num_columns + column] *
+ // coefficient_scales[column].
+ virtual bool DoConvolveQuantized(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<int8>& filter_coefficients,
+ const DeviceMemory<float>& coefficient_scales,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data) = 0;
+
+ // Same as DoConvolveQuantized above, but int8 filter coefficients.
+ virtual bool DoConvolveQuantized(
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+ const DeviceMemory<float>& input_data,
+ const dnn::FilterDescriptor& filter_descriptor,
+ const DeviceMemory<int16>& filter_coefficients,
+ const DeviceMemory<float>& coefficient_scales,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data) = 0;
+
// Variation of the above with the weight matrix split into two matrices.
// first_weights: Coefficients of the first matrix.
// second_weights: Coefficients of the second matrix.