aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-05-23 09:16:52 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-23 09:19:36 -0700
commit7a82d0fd10901f4b59f38e838a24a04df8305f73 (patch)
treeffb2949fa729e90c29f0fe81be57224c00f1abb3 /tensorflow/contrib
parentd1f44e1c60d38cc36bc438b59338c3a4eecf0615 (diff)
Support batch size > 1 in L2Normalization 8 bit quantized implementations.
PiperOrigin-RevId: 197736184
Diffstat (limited to 'tensorflow/contrib')
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h39
-rw-r--r--tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h41
-rw-r--r--tensorflow/contrib/lite/kernels/l2norm_test.cc30
3 files changed, 71 insertions, 39 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 6e5ceec85e..1b4660ef4f 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -2353,24 +2353,27 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
- TFLITE_DCHECK_EQ(outer_size, 1);
- int32 square_l2_norm = 0;
- for (int i = 0; i < depth; i++) {
- int32 diff = input_data[i] - input_zero_point;
- square_l2_norm += diff * diff;
- }
- int32 inv_l2norm_multiplier;
- int inv_l2norm_shift;
- GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier,
- &inv_l2norm_shift);
-
- for (int i = 0; i < depth; i++) {
- int32 diff = input_data[i] - input_zero_point;
- int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne(
- 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
- int32 unclamped_output_val = 128 + rescaled_diff;
- int32 output_val = std::min(255, std::max(0, unclamped_output_val));
- output_data[i] = static_cast<uint8>(output_val);
+ for (int i = 0; i < outer_size; ++i) {
+ int32 square_l2_norm = 0;
+ for (int c = 0; c < depth; c++) {
+ int32 diff = input_data[c] - input_zero_point;
+ square_l2_norm += diff * diff;
+ }
+ int32 inv_l2norm_multiplier;
+ int inv_l2norm_shift;
+ GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier,
+ &inv_l2norm_shift);
+
+ for (int c = 0; c < depth; c++) {
+ int32 diff = *input_data - input_zero_point;
+ int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne(
+ 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
+ int32 unclamped_output_val = 128 + rescaled_diff;
+ int32 output_val = std::min(255, std::max(0, unclamped_output_val));
+ *output_data = static_cast<uint8>(output_val);
+ ++input_data;
+ ++output_data;
+ }
}
}
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index a56fc0635b..cd4af48bee 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -895,25 +895,28 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
const Dims<4>& output_dims) {
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
- TFLITE_DCHECK_EQ(outer_size, 1);
- int32 square_l2_norm = 0;
- for (int i = 0; i < depth; i++) {
- int32 diff = input_data[Offset(input_dims, i, 0, 0, 0)] - input_zero_point;
- square_l2_norm += diff * diff;
- }
- int32 inv_l2norm_multiplier;
- int inv_l2norm_shift;
- GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier,
- &inv_l2norm_shift);
-
- for (int i = 0; i < depth; i++) {
- int32 diff = input_data[Offset(input_dims, i, 0, 0, 0)] - input_zero_point;
- int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne(
- 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
- int32 unclamped_output_val = 128 + rescaled_diff;
- int32 output_val = std::min(255, std::max(0, unclamped_output_val));
- output_data[Offset(output_dims, i, 0, 0, 0)] =
- static_cast<uint8>(output_val);
+ for (int i = 0; i < outer_size; ++i) {
+ int32 square_l2_norm = 0;
+ for (int c = 0; c < depth; c++) {
+ int32 diff =
+ input_data[Offset(input_dims, c, i, 0, 0)] - input_zero_point;
+ square_l2_norm += diff * diff;
+ }
+ int32 inv_l2norm_multiplier;
+ int inv_l2norm_shift;
+ GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier,
+ &inv_l2norm_shift);
+
+ for (int c = 0; c < depth; c++) {
+ int32 diff =
+ input_data[Offset(input_dims, c, i, 0, 0)] - input_zero_point;
+ int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne(
+ 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
+ int32 unclamped_output_val = 128 + rescaled_diff;
+ int32 output_val = std::min(255, std::max(0, unclamped_output_val));
+ output_data[Offset(output_dims, c, i, 0, 0)] =
+ static_cast<uint8>(output_val);
+ }
}
}
diff --git a/tensorflow/contrib/lite/kernels/l2norm_test.cc b/tensorflow/contrib/lite/kernels/l2norm_test.cc
index 11cc666bad..070ed60040 100644
--- a/tensorflow/contrib/lite/kernels/l2norm_test.cc
+++ b/tensorflow/contrib/lite/kernels/l2norm_test.cc
@@ -67,7 +67,7 @@ class L2NormOpModel : public SingleOpModel {
int output_;
};
-TEST(L2NormOpTest, SimpleTest) {
+TEST(L2NormOpTest, SimpleFloatTest) {
L2NormOpModel m({1, 1, 1, 6}, TensorType_FLOAT32,
ActivationFunctionType_NONE);
m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
@@ -76,7 +76,7 @@ TEST(L2NormOpTest, SimpleTest) {
ElementsAreArray({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
}
-TEST(L2NormOpTest, MultipleBatchesTest) {
+TEST(L2NormOpTest, MultipleBatchFloatTest) {
L2NormOpModel m({3, 1, 1, 6}, TensorType_FLOAT32,
ActivationFunctionType_NONE);
m.SetInput({
@@ -105,6 +105,32 @@ TEST(L2NormOpTest, SimpleUint8Test) {
ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}, 0.1)));
}
+TEST(L2NormOpTest, MultipleBatchUint8Test) {
+ L2NormOpModel m({3, 1, 1, 6}, TensorType_UINT8, ActivationFunctionType_NONE);
+
+ m.QuantizeAndPopulate<uint8_t>(m.input(),
+ {
+ -1.1, 0.6, 0.7, 1.2, -0.7, 0.1, // batch 1
+ -1.1, 0.6, 0.7, 1.2, -0.7, 0.1, // batch 2
+ -1.1, 0.6, 0.7, 1.2, -0.7, 0.1, // batch 3
+ });
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput<uint8_t>(),
+ ElementsAreArray({
+ 58, 166, 173, 205, 83, 134, // batch 1
+ 58, 166, 173, 205, 83, 134, // batch 2
+ 58, 166, 173, 205, 83, 134, // batch 3
+ }));
+ EXPECT_THAT(m.GetDequantizedOutput(),
+ ElementsAreArray(ArrayFloatNear(
+ {
+ -0.55, 0.3, 0.35, 0.6, -0.35, 0.05, // batch 1
+ -0.55, 0.3, 0.35, 0.6, -0.35, 0.05, // batch 2
+ -0.55, 0.3, 0.35, 0.6, -0.35, 0.05, // batch 3
+ },
+ 0.1)));
+}
+
} // namespace
} // namespace tflite