diff options
author | 2018-03-19 19:58:03 -0700 | |
---|---|---|
committer | 2018-03-19 20:01:58 -0700 | |
commit | 79d06a6261a523866ace67f7b831d7f617d550e6 (patch) | |
tree | ea4a4e5cc51f25e577f3a5cd581ebf7d35d65ff1 | |
parent | a2e0f8c24776f63b04a29fad9c66bf3d66e94f4d (diff) |
Apply output_min/output_max to the result in the NEON implementation of Add operator.
Both non-NEON and reference implementation have this, but it's missing from NEON version.
PiperOrigin-RevId: 189682984
-rw-r--r-- | tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index edd65c9170..004433498d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1583,6 +1583,8 @@ inline void Add(int left_shift, const uint8* input1_data, TFLITE_DCHECK_LT(input1_offset, 256); TFLITE_DCHECK_LT(input2_offset, 256); #ifdef USE_NEON + const auto output_activation_min_vector = vdup_n_u8(output_activation_min); + const auto output_activation_max_vector = vdup_n_u8(output_activation_max); for (; i <= size - 8; i += 8) { const auto input1_val_original = vld1_u8(input1_data + i); const auto input2_val_original = vld1_u8(input2_data + i); @@ -1628,7 +1630,10 @@ inline void Add(int left_shift, const uint8* input1_data, const auto s2_narrowed = vmovn_s32(s2); const auto s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(output_offset)); - vst1_u8(output_data + i, vqmovun_s16(s)); + const auto clamped = + vmax_u8(output_activation_min_vector, + vmin_u8(output_activation_max_vector, vqmovun_s16(s))); + vst1_u8(output_data + i, clamped); } #endif // NEON |