aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/kernels/add.cc
diff options
context:
space:
mode:
authorGravatar Andrew Selle <aselle@google.com>2017-11-10 10:35:35 -0800
committerGravatar Andrew Selle <aselle@andyselle.com>2017-11-10 16:14:42 -0800
commit0b15439f8f0f2d4755587f4096c3ea04cb199d23 (patch)
tree9aa4fc8162bf9b4ee50112a7b85703f70ca4df08 /tensorflow/contrib/lite/kernels/add.cc
parent7ac140a5845553275427162aabd9d54987144b4a (diff)
Internal Change.
PiperOrigin-RevId: 175307445
Diffstat (limited to 'tensorflow/contrib/lite/kernels/add.cc')
-rw-r--r--tensorflow/contrib/lite/kernels/add.cc184
1 files changed, 184 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/kernels/add.cc b/tensorflow/contrib/lite/kernels/add.cc
new file mode 100644
index 0000000000..0e10a249ab
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/add.cc
@@ -0,0 +1,184 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace add {
+
+// This file has three implementation of Add.
+enum KernelType {
+ kReference,
+ kGenericOptimized, // Neon-free
+ kNeonOptimized,
+};
+
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+ TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+ TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+ TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+ TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+ TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+ TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2));
+ for (int i = 0; i < NumDimensions(input1); ++i) {
+ TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i),
+ SizeOfDimension(input2, i));
+ }
+
+ TF_LITE_ENSURE_EQ(context, input1->type, output->type);
+ TF_LITE_ENSURE_EQ(context, input2->type, output->type);
+
+ TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims);
+ return context->ResizeTensor(context, output, output_size);
+}
+
+template <KernelType kernel_type>
+void EvalAddFloat(TfLiteContext* context, TfLiteNode* node,
+ TfLiteAddParams* params, TfLiteTensor* input1,
+ TfLiteTensor* input2, TfLiteTensor* output) {
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(params->activation, &output_activation_min,
+ &output_activation_max);
+#define TF_LITE_ADD(type) \
+ type::Add(GetTensorData<float>(input1), GetTensorDims(input1), \
+ GetTensorData<float>(input2), GetTensorDims(input2), \
+ output_activation_min, output_activation_max, \
+ GetTensorData<float>(output), GetTensorDims(output))
+ if (kernel_type == kReference) {
+ TF_LITE_ADD(reference_ops);
+ } else {
+ TF_LITE_ADD(optimized_ops);
+ }
+#undef TF_LITE_ADD
+}
+
+template <KernelType kernel_type>
+void EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
+ TfLiteAddParams* params, TfLiteTensor* input1,
+ TfLiteTensor* input2, TfLiteTensor* output) {
+ auto input1_offset = -input1->params.zero_point;
+ auto input2_offset = -input2->params.zero_point;
+ auto output_offset = output->params.zero_point;
+ const int left_shift = 20;
+ const double twice_max_input_scale =
+ 2 * std::max(input1->params.scale, input2->params.scale);
+ const double real_input1_multiplier =
+ input1->params.scale / twice_max_input_scale;
+ const double real_input2_multiplier =
+ input2->params.scale / twice_max_input_scale;
+ const double real_output_multiplier =
+ twice_max_input_scale / ((1 << left_shift) * output->params.scale);
+
+ int32 input1_multiplier;
+ int input1_shift;
+ QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier,
+ &input1_shift);
+ int32 input2_multiplier;
+ int input2_shift;
+ QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier,
+ &input2_shift);
+ int32 output_multiplier;
+ int output_shift;
+ QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier,
+ &output_shift);
+
+ int32 output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(params->activation, output,
+ &output_activation_min, &output_activation_max);
+
+#define TF_LITE_ADD(type) \
+ type::BroadcastAdd( \
+ left_shift, GetTensorData<uint8_t>(input1), GetTensorDims(input1), \
+ input1_offset, input1_multiplier, input1_shift, \
+ GetTensorData<uint8_t>(input2), GetTensorDims(input2), input2_offset, \
+ input2_multiplier, input2_shift, output_offset, output_multiplier, \
+ output_shift, output_activation_min, output_activation_max, \
+ GetTensorData<uint8_t>(output), GetTensorDims(output));
+
+ if (kernel_type == kReference) {
+ TF_LITE_ADD(reference_ops);
+ } else {
+ TF_LITE_ADD(optimized_ops);
+ }
+#undef TF_LITE_ADD
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+ auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
+
+ TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+ TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+ TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+ if (output->type == kTfLiteFloat32) {
+ EvalAddFloat<kernel_type>(context, node, params, input1, input2, output);
+ } else if (output->type == kTfLiteUInt8) {
+ EvalAddQuantized<kernel_type>(context, node, params, input1, input2,
+ output);
+ } else {
+ context->ReportError(context,
+ "Inputs and outputs not all float|unit8 types.");
+ return kTfLiteError;
+ }
+
+ return kTfLiteOk;
+}
+
+} // namespace add
+
+TfLiteRegistration* Register_ADD_REF() {
+ static TfLiteRegistration r = {nullptr, nullptr, add::Prepare,
+ add::Eval<add::kReference>};
+ return &r;
+}
+
+TfLiteRegistration* Register_ADD_GENERIC_OPT() {
+ static TfLiteRegistration r = {nullptr, nullptr, add::Prepare,
+ add::Eval<add::kGenericOptimized>};
+ return &r;
+}
+
+TfLiteRegistration* Register_ADD_NEON_OPT() {
+ static TfLiteRegistration r = {nullptr, nullptr, add::Prepare,
+ add::Eval<add::kNeonOptimized>};
+ return &r;
+}
+
+TfLiteRegistration* Register_ADD() {
+#ifdef USE_NEON
+ return Register_ADD_NEON_OPT();
+#else
+ return Register_ADD_GENERIC_OPT();
+#endif
+}
+
+} // namespace builtin
+} // namespace ops
+} // namespace tflite