diff options
author | 2017-11-10 10:35:35 -0800 | |
---|---|---|
committer | 2017-11-10 16:14:42 -0800 | |
commit | 0b15439f8f0f2d4755587f4096c3ea04cb199d23 (patch) | |
tree | 9aa4fc8162bf9b4ee50112a7b85703f70ca4df08 /tensorflow/contrib/lite/kernels/add.cc | |
parent | 7ac140a5845553275427162aabd9d54987144b4a (diff) |
Internal Change.
PiperOrigin-RevId: 175307445
Diffstat (limited to 'tensorflow/contrib/lite/kernels/add.cc')
-rw-r--r-- | tensorflow/contrib/lite/kernels/add.cc | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/kernels/add.cc b/tensorflow/contrib/lite/kernels/add.cc new file mode 100644 index 0000000000..0e10a249ab --- /dev/null +++ b/tensorflow/contrib/lite/kernels/add.cc @@ -0,0 +1,184 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace add { + +// This file has three implementation of Add. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); + for (int i = 0; i < NumDimensions(input1); ++i) { + TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), + SizeOfDimension(input2, i)); + } + + TF_LITE_ENSURE_EQ(context, input1->type, output->type); + TF_LITE_ENSURE_EQ(context, input2->type, output->type); + + TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); + return context->ResizeTensor(context, output, output_size); +} + +template <KernelType kernel_type> +void EvalAddFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, TfLiteTensor* input1, + TfLiteTensor* input2, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(params->activation, &output_activation_min, + &output_activation_max); +#define TF_LITE_ADD(type) \ + type::Add(GetTensorData<float>(input1), GetTensorDims(input1), \ + GetTensorData<float>(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData<float>(output), GetTensorDims(output)) + if (kernel_type == kReference) { + TF_LITE_ADD(reference_ops); + } else { + TF_LITE_ADD(optimized_ops); + } +#undef TF_LITE_ADD +} + +template <KernelType kernel_type> +void EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, TfLiteTensor* input1, + TfLiteTensor* input2, TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + const int left_shift = 20; + const double twice_max_input_scale = + 2 * std::max(input1->params.scale, input2->params.scale); + const double real_input1_multiplier = + input1->params.scale / twice_max_input_scale; + const double real_input2_multiplier = + input2->params.scale / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / ((1 << left_shift) * output->params.scale); + + int32 input1_multiplier; + int input1_shift; + QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, + &input1_shift); + int32 input2_multiplier; + int input2_shift; + QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, + &input2_shift); + int32 output_multiplier; + int output_shift; + QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_ADD(type) \ + type::BroadcastAdd( \ + left_shift, GetTensorData<uint8_t>(input1), GetTensorDims(input1), \ + input1_offset, input1_multiplier, input1_shift, \ + GetTensorData<uint8_t>(input2), GetTensorDims(input2), input2_offset, \ + input2_multiplier, input2_shift, output_offset, output_multiplier, \ + output_shift, output_activation_min, output_activation_max, \ + GetTensorData<uint8_t>(output), GetTensorDims(output)); + + if (kernel_type == kReference) { + TF_LITE_ADD(reference_ops); + } else { + TF_LITE_ADD(optimized_ops); + } +#undef TF_LITE_ADD +} + +template <KernelType kernel_type> +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data); + + TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalAddFloat<kernel_type>(context, node, params, input1, input2, output); + } else if (output->type == kTfLiteUInt8) { + EvalAddQuantized<kernel_type>(context, node, params, input1, input2, + output); + } else { + context->ReportError(context, + "Inputs and outputs not all float|unit8 types."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace add + +TfLiteRegistration* Register_ADD_REF() { + static TfLiteRegistration r = {nullptr, nullptr, add::Prepare, + add::Eval<add::kReference>}; + return &r; +} + +TfLiteRegistration* Register_ADD_GENERIC_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, add::Prepare, + add::Eval<add::kGenericOptimized>}; + return &r; +} + +TfLiteRegistration* Register_ADD_NEON_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, add::Prepare, + add::Eval<add::kNeonOptimized>}; + return &r; +} + +TfLiteRegistration* Register_ADD() { +#ifdef USE_NEON + return Register_ADD_NEON_OPT(); +#else + return Register_ADD_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite |