aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-03-19 19:58:03 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-03-19 20:01:58 -0700
commit79d06a6261a523866ace67f7b831d7f617d550e6 (patch)
treeea4a4e5cc51f25e577f3a5cd581ebf7d35d65ff1
parenta2e0f8c24776f63b04a29fad9c66bf3d66e94f4d (diff)
Apply output_min/output_max to the result in the NEON implementation of Add operator.
Both non-NEON and reference implementation have this, but it's missing from NEON version. PiperOrigin-RevId: 189682984
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h7
1 files changed, 6 insertions, 1 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index edd65c9170..004433498d 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -1583,6 +1583,8 @@ inline void Add(int left_shift, const uint8* input1_data,
TFLITE_DCHECK_LT(input1_offset, 256);
TFLITE_DCHECK_LT(input2_offset, 256);
#ifdef USE_NEON
+ const auto output_activation_min_vector = vdup_n_u8(output_activation_min);
+ const auto output_activation_max_vector = vdup_n_u8(output_activation_max);
for (; i <= size - 8; i += 8) {
const auto input1_val_original = vld1_u8(input1_data + i);
const auto input2_val_original = vld1_u8(input2_data + i);
@@ -1628,7 +1630,10 @@ inline void Add(int left_shift, const uint8* input1_data,
const auto s2_narrowed = vmovn_s32(s2);
const auto s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed),
vdupq_n_s16(output_offset));
- vst1_u8(output_data + i, vqmovun_s16(s));
+ const auto clamped =
+ vmax_u8(output_activation_min_vector,
+ vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
+ vst1_u8(output_data + i, clamped);
}
#endif // NEON