diff options
Diffstat (limited to 'tensorflow/contrib/lite/kernels/depthwise_conv.cc')
-rw-r--r-- | tensorflow/contrib/lite/kernels/depthwise_conv.cc | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc new file mode 100644 index 0000000000..15dbfe08c8 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -0,0 +1,289 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include <unistd.h> +#include <cassert> +#include <cmath> +#include <cstdio> +#include <cstdlib> +#include <iostream> +#include <limits> + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/kernels/padding.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace depthwise_conv { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// This file has three implementation of DepthwiseConv. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +struct OpData { + TfLitePaddingValues padding; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multipler plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to carry information from Prepare() to + // Eval(). + return new OpData; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast<OpData*>(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data); + OpData* data = reinterpret_cast<OpData*>(node->user_data); + + // TODO(ahentz): use could use GetOptionalInputTensor() here, but we need to + // decide whether we are OK with optional tensors being completely absent, as + // opposed to having -1 as their index. + bool hasBias = NumInputs(node) == 3; + + TF_LITE_ENSURE(context, hasBias || NumInputs(node) == 2); + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TfLiteTensor* bias = nullptr; + + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4); + + // The parameter 'depth_multiplier' is redundant, so we check here to make + // sure it is consistent with the given dimensions. + TF_LITE_ENSURE_EQ(context, + params->depth_multiplier * SizeOfDimension(input, 3), + SizeOfDimension(filter, 3)); + + const TfLiteType data_type = input->type; + TF_LITE_ENSURE(context, + data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8); + TF_LITE_ENSURE_EQ(context, output->type, data_type); + TF_LITE_ENSURE_EQ(context, filter->type, data_type); + + if (hasBias) { + bias = GetInput(context, node, kBiasTensor); + if (data_type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0); + } else { + TF_LITE_ENSURE_EQ(context, bias->type, data_type); + } + TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1); + TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 3), + SizeOfDimension(bias, 0)); + } + + int channels_out = SizeOfDimension(filter, 3); + int width = SizeOfDimension(input, 2); + int height = SizeOfDimension(input, 1); + int filter_width = SizeOfDimension(filter, 2); + int filter_height = SizeOfDimension(filter, 1); + int batches = SizeOfDimension(input, 0); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params->padding; + auto compute_out_size = [padding](int imageSize, int filterSize, + int stride) -> int { + return padding == kTfLitePaddingSame + ? (imageSize + stride - 1) / stride + : padding == kTfLitePaddingValid + ? (imageSize - filterSize + stride) / stride + : 0; + }; + + int out_width = compute_out_size(width, filter_width, params->stride_width); + int out_height = + compute_out_size(height, filter_height, params->stride_height); + + data->padding.height = + ComputePadding(params->stride_height, height, filter_height, out_height); + data->padding.width = + ComputePadding(params->stride_width, width, filter_width, out_width); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (data_type != kTfLiteFloat32) { + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, + &data->output_shift); + CalculateActivationRangeUint8(params->activation, output, + &data->output_activation_min, + &data->output_activation_max); + } + + TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4); + outputSize->data[0] = batches; + outputSize->data[1] = out_height; + outputSize->data[2] = out_width; + outputSize->data[3] = channels_out; + return context->ResizeTensor(context, output, outputSize); +} + +template <KernelType kernel_type> +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* filter, TfLiteTensor* bias, + TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(params->activation, &output_activation_min, + &output_activation_max); + + void (*depthwise_conv)(const float*, const Dims<4>&, const float*, + const Dims<4>&, const float*, const Dims<4>&, int, int, + int, int, int, float, float, float*, const Dims<4>&); + if (kernel_type == kReference) { + depthwise_conv = &reference_ops::DepthwiseConv; + } else { + depthwise_conv = &optimized_ops::DepthwiseConv; + } + + depthwise_conv( + GetTensorData<float>(input), GetTensorDims(input), + GetTensorData<float>(filter), GetTensorDims(filter), + GetTensorData<float>(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, data->padding.width, data->padding.height, + params->depth_multiplier, output_activation_min, output_activation_max, + GetTensorData<float>(output), GetTensorDims(output)); +} + +template <KernelType kernel_type> +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + TfLiteTensor* input, TfLiteTensor* filter, + TfLiteTensor* bias, TfLiteTensor* output) { + auto input_offset = -input->params.zero_point; + auto filter_offset = -filter->params.zero_point; + auto output_offset = output->params.zero_point; + + void (*depthwise_conv)(const uint8*, const Dims<4>&, int32, const uint8*, + const Dims<4>&, int32, const int32*, const Dims<4>&, + int, int, int, int, int, int32, int32, int, int32, + int32, uint8*, const Dims<4>&); + if (kernel_type == kReference) { + depthwise_conv = &reference_ops::DepthwiseConv; + } else { + depthwise_conv = &optimized_ops::DepthwiseConv; + } + + depthwise_conv( + GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset, + GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset, + GetTensorData<int32_t>(bias), GetTensorDims(bias), params->stride_width, + params->stride_height, data->padding.width, data->padding.height, + params->depth_multiplier, output_offset, data->output_multiplier, + data->output_shift, data->output_activation_min, + data->output_activation_max, GetTensorData<uint8_t>(output), + GetTensorDims(output)); +} + +template <KernelType kernel_type> +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data); + OpData* data = reinterpret_cast<OpData*>(node->user_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TfLiteTensor* bias = + (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr; + + // TODO(aselle): Consider whether float conv and quantized conv should be + // separate ops to avoid dispatch overhead here. + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + EvalFloat<kernel_type>(context, node, params, data, input, filter, bias, + output); + break; + case kTfLiteUInt8: + EvalQuantized<kernel_type>(context, node, params, data, input, filter, + bias, output); + break; + default: + context->ReportError(context, "Type not currently supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace depthwise_conv + +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() { + static TfLiteRegistration r = { + depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare, + depthwise_conv::Eval<depthwise_conv::kReference>}; + return &r; +} + +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() { + static TfLiteRegistration r = { + depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare, + depthwise_conv::Eval<depthwise_conv::kGenericOptimized>}; + return &r; +} + +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() { + static TfLiteRegistration r = { + depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare, + depthwise_conv::Eval<depthwise_conv::kNeonOptimized>}; + return &r; +} + +TfLiteRegistration* Register_DEPTHWISE_CONV_2D() { +#ifdef USE_NEON + return Register_DEPTHWISE_CONVOLUTION_NEON_OPT(); +#else + return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite |