aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-07-18 08:47:06 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-07-18 08:51:16 -0700
commitbdb2967a298236e24011405907cd19737386934e (patch)
treedcff69670374af7fd3c30cee2161d791153c7654
parent0a9ce966098fd2e1039daefd2960a9135a567cf9 (diff)
Fix metagemm calls in quantized ops. Only use metagemm multiplication for
k <= 2048. PiperOrigin-RevId: 162356878
-rw-r--r--tensorflow/core/kernels/quantized_conv_ops.cc2
-rw-r--r--tensorflow/core/kernels/quantized_matmul_op.cc2
2 files changed, 2 insertions, 2 deletions
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 5658dcc069..3b0764bb9b 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -381,7 +381,7 @@ class Im2ColConvFunctor {
if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
std::is_same<T2, quint8>() && std::is_same<T3, qint32>() &&
(output_offset == 0) && (output_mult == 1) && (output_shift == 0) &&
- (transpose_c == false)) {
+ (transpose_c == false) && (k <= 2048)) {
meta::QuantizedGemm(context, transpose_a, transpose_b, im2col_buffer,
filter_data, chunk_output_data, m, n, k,
-input_offset, -filter_offset, lda, ldb, ldc);
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc
index 86c722e587..afb30d5f62 100644
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op.cc
@@ -135,7 +135,7 @@ class QuantizedMatMulOp : public OpKernel {
if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() &&
std::is_same<T2, quint8>() && std::is_same<Toutput, qint32>() &&
(offset_c == 0) && (mult_c == 1) && (shift_c == 0) &&
- (transpose_c == false)) {
+ (transpose_c == false) && (k <= 2048)) {
// Gemmlowp/meta code path works on 32 & 64 bit Arm with NEON Simd and
// allows optimized quantized 8bit to 32bit gemm.
meta::QuantizedGemm(context, transpose_a_, transpose_b_, a_data, b_data,