aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/kernels/depthwise_conv.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/lite/kernels/depthwise_conv.cc')
-rw-r--r--tensorflow/contrib/lite/kernels/depthwise_conv.cc289
1 files changed, 289 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
new file mode 100644
index 0000000000..15dbfe08c8
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
@@ -0,0 +1,289 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <unistd.h>
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <limits>
+
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+#include "tensorflow/contrib/lite/kernels/padding.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace depthwise_conv {
+
+constexpr int kInputTensor = 0;
+constexpr int kFilterTensor = 1;
+constexpr int kBiasTensor = 2;
+constexpr int kOutputTensor = 0;
+
+// This file has three implementation of DepthwiseConv.
+enum KernelType {
+ kReference,
+ kGenericOptimized, // Neon-free
+ kNeonOptimized,
+};
+
+struct OpData {
+ TfLitePaddingValues padding;
+ // The scaling factor from input to output (aka the 'real multiplier') can
+ // be represented as a fixed point multipler plus a left shift.
+ int32_t output_multiplier;
+ int output_shift;
+ // The range of the fused activation layer. For example for kNone and
+ // uint8_t these would be 0 and 255.
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+ // This is a builtin op, so we don't use the contents in 'buffer', if any.
+ // Instead, we allocate a new object to carry information from Prepare() to
+ // Eval().
+ return new OpData;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+ delete reinterpret_cast<OpData*>(buffer);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+ auto* params =
+ reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
+ OpData* data = reinterpret_cast<OpData*>(node->user_data);
+
+ // TODO(ahentz): use could use GetOptionalInputTensor() here, but we need to
+ // decide whether we are OK with optional tensors being completely absent, as
+ // opposed to having -1 as their index.
+ bool hasBias = NumInputs(node) == 3;
+
+ TF_LITE_ENSURE(context, hasBias || NumInputs(node) == 2);
+ TfLiteTensor* input = GetInput(context, node, kInputTensor);
+ TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+ TfLiteTensor* bias = nullptr;
+
+ TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+ TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+ TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+ TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4);
+
+ // The parameter 'depth_multiplier' is redundant, so we check here to make
+ // sure it is consistent with the given dimensions.
+ TF_LITE_ENSURE_EQ(context,
+ params->depth_multiplier * SizeOfDimension(input, 3),
+ SizeOfDimension(filter, 3));
+
+ const TfLiteType data_type = input->type;
+ TF_LITE_ENSURE(context,
+ data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8);
+ TF_LITE_ENSURE_EQ(context, output->type, data_type);
+ TF_LITE_ENSURE_EQ(context, filter->type, data_type);
+
+ if (hasBias) {
+ bias = GetInput(context, node, kBiasTensor);
+ if (data_type == kTfLiteUInt8) {
+ TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
+ TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
+ } else {
+ TF_LITE_ENSURE_EQ(context, bias->type, data_type);
+ }
+ TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1);
+ TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 3),
+ SizeOfDimension(bias, 0));
+ }
+
+ int channels_out = SizeOfDimension(filter, 3);
+ int width = SizeOfDimension(input, 2);
+ int height = SizeOfDimension(input, 1);
+ int filter_width = SizeOfDimension(filter, 2);
+ int filter_height = SizeOfDimension(filter, 1);
+ int batches = SizeOfDimension(input, 0);
+
+ // Matching GetWindowedOutputSize in TensorFlow.
+ auto padding = params->padding;
+ auto compute_out_size = [padding](int imageSize, int filterSize,
+ int stride) -> int {
+ return padding == kTfLitePaddingSame
+ ? (imageSize + stride - 1) / stride
+ : padding == kTfLitePaddingValid
+ ? (imageSize - filterSize + stride) / stride
+ : 0;
+ };
+
+ int out_width = compute_out_size(width, filter_width, params->stride_width);
+ int out_height =
+ compute_out_size(height, filter_height, params->stride_height);
+
+ data->padding.height =
+ ComputePadding(params->stride_height, height, filter_height, out_height);
+ data->padding.width =
+ ComputePadding(params->stride_width, width, filter_width, out_width);
+
+ // Note that quantized inference requires that all tensors have their
+ // parameters set. This is usually done during quantized training.
+ if (data_type != kTfLiteFloat32) {
+ double real_multiplier = 0.0;
+ TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
+ context, input, filter, bias, output, &real_multiplier));
+ QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier,
+ &data->output_shift);
+ CalculateActivationRangeUint8(params->activation, output,
+ &data->output_activation_min,
+ &data->output_activation_max);
+ }
+
+ TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4);
+ outputSize->data[0] = batches;
+ outputSize->data[1] = out_height;
+ outputSize->data[2] = out_width;
+ outputSize->data[3] = channels_out;
+ return context->ResizeTensor(context, output, outputSize);
+}
+
+template <KernelType kernel_type>
+void EvalFloat(TfLiteContext* context, TfLiteNode* node,
+ TfLiteDepthwiseConvParams* params, OpData* data,
+ TfLiteTensor* input, TfLiteTensor* filter, TfLiteTensor* bias,
+ TfLiteTensor* output) {
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(params->activation, &output_activation_min,
+ &output_activation_max);
+
+ void (*depthwise_conv)(const float*, const Dims<4>&, const float*,
+ const Dims<4>&, const float*, const Dims<4>&, int, int,
+ int, int, int, float, float, float*, const Dims<4>&);
+ if (kernel_type == kReference) {
+ depthwise_conv = &reference_ops::DepthwiseConv;
+ } else {
+ depthwise_conv = &optimized_ops::DepthwiseConv;
+ }
+
+ depthwise_conv(
+ GetTensorData<float>(input), GetTensorDims(input),
+ GetTensorData<float>(filter), GetTensorDims(filter),
+ GetTensorData<float>(bias), GetTensorDims(bias), params->stride_width,
+ params->stride_height, data->padding.width, data->padding.height,
+ params->depth_multiplier, output_activation_min, output_activation_max,
+ GetTensorData<float>(output), GetTensorDims(output));
+}
+
+template <KernelType kernel_type>
+void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+ TfLiteDepthwiseConvParams* params, OpData* data,
+ TfLiteTensor* input, TfLiteTensor* filter,
+ TfLiteTensor* bias, TfLiteTensor* output) {
+ auto input_offset = -input->params.zero_point;
+ auto filter_offset = -filter->params.zero_point;
+ auto output_offset = output->params.zero_point;
+
+ void (*depthwise_conv)(const uint8*, const Dims<4>&, int32, const uint8*,
+ const Dims<4>&, int32, const int32*, const Dims<4>&,
+ int, int, int, int, int, int32, int32, int, int32,
+ int32, uint8*, const Dims<4>&);
+ if (kernel_type == kReference) {
+ depthwise_conv = &reference_ops::DepthwiseConv;
+ } else {
+ depthwise_conv = &optimized_ops::DepthwiseConv;
+ }
+
+ depthwise_conv(
+ GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset,
+ GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset,
+ GetTensorData<int32_t>(bias), GetTensorDims(bias), params->stride_width,
+ params->stride_height, data->padding.width, data->padding.height,
+ params->depth_multiplier, output_offset, data->output_multiplier,
+ data->output_shift, data->output_activation_min,
+ data->output_activation_max, GetTensorData<uint8_t>(output),
+ GetTensorDims(output));
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+ auto* params =
+ reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
+ OpData* data = reinterpret_cast<OpData*>(node->user_data);
+
+ TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+ TfLiteTensor* input = GetInput(context, node, kInputTensor);
+ TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+ TfLiteTensor* bias =
+ (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
+
+ // TODO(aselle): Consider whether float conv and quantized conv should be
+ // separate ops to avoid dispatch overhead here.
+ switch (input->type) { // Already know in/out types are same.
+ case kTfLiteFloat32:
+ EvalFloat<kernel_type>(context, node, params, data, input, filter, bias,
+ output);
+ break;
+ case kTfLiteUInt8:
+ EvalQuantized<kernel_type>(context, node, params, data, input, filter,
+ bias, output);
+ break;
+ default:
+ context->ReportError(context, "Type not currently supported.");
+ return kTfLiteError;
+ }
+ return kTfLiteOk;
+}
+
+} // namespace depthwise_conv
+
+TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() {
+ static TfLiteRegistration r = {
+ depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
+ depthwise_conv::Eval<depthwise_conv::kReference>};
+ return &r;
+}
+
+TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() {
+ static TfLiteRegistration r = {
+ depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
+ depthwise_conv::Eval<depthwise_conv::kGenericOptimized>};
+ return &r;
+}
+
+TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() {
+ static TfLiteRegistration r = {
+ depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
+ depthwise_conv::Eval<depthwise_conv::kNeonOptimized>};
+ return &r;
+}
+
+TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
+#ifdef USE_NEON
+ return Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
+#else
+ return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
+#endif
+}
+
+} // namespace builtin
+} // namespace ops
+} // namespace tflite