diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-09-26 13:42:36 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-26 13:47:11 -0700 |
commit | 652ce1aaefdadd04a9905a0788ab26c6fff93658 (patch) | |
tree | 58a766a41069b3985de75ab092d3672d4d737a4d /tensorflow/contrib/lite/kernels | |
parent | d600b1b55fa851648918fed7a67f61eefd554034 (diff) |
Kernel signature reworking, misc kernel improvements and migrations.
PiperOrigin-RevId: 214661332
Diffstat (limited to 'tensorflow/contrib/lite/kernels')
6 files changed, 139 insertions, 55 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 0999738396..732880d9da 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -59,6 +59,7 @@ using reference_ops::BroadcastLessEqual; using reference_ops::BroadcastMul4DSlow; using reference_ops::BroadcastSub4DSlow; using reference_ops::Concatenation; +using reference_ops::ConcatenationWithScaling; using reference_ops::DepthConcatenation; using reference_ops::Dequantize; using reference_ops::Div; diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 7a5535489a..cd9e1b255d 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1800,7 +1800,6 @@ inline void Concatenation(int concat_dim, const Scalar* const* input_data, // quantized as it takes scale as a floating point value. This should be fixed // when optimizng this routine further. -// template <> inline void ConcatenationWithScaling(const ConcatenationParams& params, const RuntimeShape* const* input_shapes, const uint8* const* input_data, @@ -1813,15 +1812,13 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params, const int32 output_zeropoint = params.output_zeropoint; const float output_scale = params.output_scale; - // The arguments input_zeropoint and input_scale are expected to be an array - // that have the quantization parameters for all the inputs to the concat - // operator. - TFLITE_DCHECK_GT(inputs_count, 1); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int concat_dimensions = output_shape.DimensionsCount(); + TFLITE_DCHECK_LT(axis, concat_dimensions); + int64_t concat_size = 0; for (int i = 0; i < inputs_count; i++) { - TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), 4); - for (int j = 0; j < 4; j++) { + TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions); + for (int j = 0; j < concat_dimensions; j++) { if (j != axis) { MatchingDim(*input_shapes[i], j, output_shape, j); } @@ -1836,9 +1833,10 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params, // For all input arrays, // FlatSize() = outer_size * Dims(axis) * base_inner_size; int64_t base_inner_size = 1; - for (int i = axis + 1; i < 4; ++i) { + for (int i = axis + 1; i < concat_dimensions; ++i) { base_inner_size *= output_shape.Dims(i); } + const float inverse_output_scale = 1.f / output_scale; uint8* output_ptr = output_data; for (int k = 0; k < outer_size; k++) { @@ -1892,37 +1890,51 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data, } template <typename Scalar> -void Pack(int dim, const Scalar* const* input_data, - const Dims<4>* const* input_dims, int inputs_count, - Scalar* output_data, const Dims<4>& output_dims) { - TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); +void Pack(const PackParams& params, const RuntimeShape* const* input_shapes, + const Scalar* const* input_data, const RuntimeShape& output_shape, + Scalar* output_data) { + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + int inputs_count = params.inputs_count; + int outer_size = 1; - for (int i = dim + 1; i < 4; i++) { - outer_size *= output_dims.sizes[i]; + for (int i = 0; i < axis; i++) { + outer_size *= output_shape.Dims(i); } - Scalar* output_ptr = output_data; - const int copy_size = FlatSize(**input_dims) / outer_size; - for (int k = 0; k < outer_size; k++) { - for (int i = 0; i < inputs_count; ++i) { - memcpy(output_ptr, input_data[i] + k * copy_size, - copy_size * sizeof(Scalar)); - output_ptr += copy_size; + int copy_size = 1; + for (int i = params.axis + 1; i < dimensions; i++) { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + for (int i = 0; i < inputs_count; ++i) { + for (int k = 0; k < outer_size; k++) { + const Scalar* input_ptr = input_data[i] + copy_size * k; + int loc = k * inputs_count * copy_size + i * copy_size; + memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar)); } } } template <typename Scalar> -void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims, - int dimensions, int outputs_count, Scalar* const* output_datas, - const Dims<4>& output_dims) { +void Unpack(const UnpackParams& params, const RuntimeShape& input_shape, + const Scalar* input_data, const RuntimeShape& output_shape, + Scalar* const* output_datas) { + const int dimensions = input_shape.DimensionsCount(); + const int outputs_count = params.num_split; + int outer_size = 1; - for (int i = dimensions - axis; i < 4; i++) { - outer_size *= input_dims.sizes[i]; + for (int i = 0; i < params.axis; i++) { + outer_size *= input_shape.Dims(i); + } + int copy_size = 1; + for (int i = params.axis + 1; i < dimensions; i++) { + copy_size *= input_shape.Dims(i); } + TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size); - const int copy_size = FlatSize(input_dims) / outer_size / outputs_count; - for (int k = 0; k < outer_size; k++) { - for (int i = 0; i < outputs_count; ++i) { + for (int i = 0; i < outputs_count; ++i) { + for (int k = 0; k < outer_size; k++) { Scalar* output_ptr = output_datas[i] + copy_size * k; int loc = k * outputs_count * copy_size + i * copy_size; memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); @@ -1931,18 +1943,29 @@ void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims, } template <typename Scalar> -void Pack(int dim, const Scalar* const* input_data, - const Dims<4>* const* input_dims, const int32* input_zeropoint, - const float* input_scale, int inputs_count, Scalar* output_data, - const Dims<4>& output_dims, const int32 output_zeropoint, - const float output_scale) { - TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); +void PackWithScaling(const PackParams& params, + const RuntimeShape* const* input_shapes, + const uint8* const* input_data, + const RuntimeShape& output_shape, uint8* output_data) { + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + const int32* input_zeropoint = params.input_zeropoint; + const float* input_scale = params.input_scale; + int inputs_count = params.inputs_count; + const int32 output_zeropoint = params.output_zeropoint; + const float output_scale = params.output_scale; + int outer_size = 1; - for (int i = dim + 1; i < 4; i++) { - outer_size *= output_dims.sizes[i]; + for (int i = 0; i < axis; i++) { + outer_size *= output_shape.Dims(i); } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; i++) { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + Scalar* output_ptr = output_data; - const int copy_size = FlatSize(**input_dims) / outer_size; const float inverse_output_scale = 1.f / output_scale; for (int k = 0; k < outer_size; k++) { for (int i = 0; i < inputs_count; ++i) { @@ -3374,15 +3397,21 @@ inline void Floor(const RuntimeShape& input_shape, const float* input_data, template <typename T> inline void Gather(const tflite::GatherParams& op_params, - const RuntimeShape& input_shape, const T* input_data, - const RuntimeShape& coords_shape, const int32* coords_data, - const RuntimeShape& output_shape, T* output_data) { - // Enable these checks when moving legacy ops to legacy_reference_ops. - // - // TFLITE_DCHECK_EQ(coords_shape.DimensionsCount(), 1); + const RuntimeShape& unextended_input_shape, + const T* input_data, const RuntimeShape& coords_shape, + const int32* coords_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int input_rank = op_params.input_rank; const int gather_dimensions = output_shape.DimensionsCount(); - TFLITE_DCHECK_LE(input_shape.DimensionsCount(), gather_dimensions); + TFLITE_DCHECK_GE(input_shape.DimensionsCount(), gather_dimensions); const int axis = gather_dimensions - input_rank; TFLITE_DCHECK_LT(axis, gather_dimensions); TFLITE_DCHECK_GE(axis, 0); @@ -4762,22 +4791,44 @@ inline void BroadcastComparison(int left_shift, const T* input1_data, input2_data, output_shape, output_data); \ } \ template <typename T> \ + inline void name##NoScaling( \ + const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ + const T* input1_data, const RuntimeShape& input2_shape, \ + const T* input2_data, const RuntimeShape& output_shape, \ + bool* output_data) { \ + gemmlowp::ScopedProfilingLabel label(#name "NoScaling"); \ + ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data, \ + input2_shape, input2_data, output_shape, \ + output_data); \ + } \ + template <typename T> \ inline void name##WithScaling( \ const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ const T* input1_data, const RuntimeShape& input2_shape, \ const T* input2_data, const RuntimeShape& output_shape, \ bool* output_data) { \ - gemmlowp::ScopedProfilingLabel label(#name "/8bit"); \ + gemmlowp::ScopedProfilingLabel label(#name "WithScaling/8bit"); \ ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data, \ input2_shape, input2_data, \ output_shape, output_data); \ } \ + template <typename T> \ + inline void Broadcast4DSlow##name##NoScaling( \ + const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ + const T* input1_data, const RuntimeShape& input2_shape, \ + const T* input2_data, const RuntimeShape& output_shape, \ + bool* output_data) { \ + gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name "NoScaling"); \ + BroadcastComparison4DSlowImpl<T, name##Fn>( \ + op_params, input1_shape, input1_data, input2_shape, input2_data, \ + output_shape, output_data); \ + } \ inline void Broadcast4DSlow##name( \ const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ const float* input1_data, const RuntimeShape& input2_shape, \ const float* input2_data, const RuntimeShape& output_shape, \ bool* output_data) { \ - gemmlowp::ScopedProfilingLabel label("Broadcast" #name); \ + gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name); \ BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data, \ input2_shape, input2_data, \ output_shape, output_data); \ @@ -4788,7 +4839,7 @@ inline void BroadcastComparison(int left_shift, const T* input1_data, const T* input1_data, const RuntimeShape& input2_shape, \ const T* input2_data, const RuntimeShape& output_shape, \ bool* output_data) { \ - gemmlowp::ScopedProfilingLabel label("Broadcast" #name "/8bit"); \ + gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name "/8bit"); \ BroadcastComparison4DSlowWithScaling<T, name##Fn>( \ op_params, input1_shape, input1_data, input2_shape, input2_data, \ output_shape, output_data); \ diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h index 13106456df..f1b08383b0 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor.h +++ b/tensorflow/contrib/lite/kernels/internal/tensor.h @@ -58,11 +58,14 @@ class VectorOfTensors { all_data_.reserve(num_tensors); all_dims_.reserve(num_tensors); all_dims_ptr_.reserve(num_tensors); + all_shape_.reserve(num_tensors); + all_shape_ptr_.reserve(num_tensors); for (int i = 0; i < num_tensors; ++i) { TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; all_data_.push_back(GetTensorData<T>(t)); all_dims_.push_back(GetTensorDims(t)); + all_shape_.push_back(GetTensorShape(t)); } // Taking the pointer from inside a std::vector is only OK if the vector is @@ -70,6 +73,7 @@ class VectorOfTensors { // are free to grab iterators here. for (int i = 0; i < num_tensors; ++i) { all_dims_ptr_.push_back(&all_dims_[i]); + all_shape_ptr_.push_back(&all_shape_[i]); } } // Return a pointer to the data pointers of all tensors in the list. For @@ -84,10 +88,18 @@ class VectorOfTensors { // dims[1] are the dimensions of the second tensor in the list. const Dims<4>* const* dims() const { return all_dims_ptr_.data(); } + // Return a pointer the shape pointers of all tensors in the list. For + // example: + // const RuntimeShape* const* d = v.dims(); + // dims[1] are the dimensions of the second tensor in the list. + const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); } + private: std::vector<T*> all_data_; std::vector<Dims<4>> all_dims_; std::vector<Dims<4>*> all_dims_ptr_; + std::vector<RuntimeShape> all_shape_; + std::vector<RuntimeShape*> all_shape_ptr_; }; // A list of quantized tensors in a format that can be used by kernels like diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index a3a5994c9c..b39347758a 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -875,6 +875,15 @@ struct MeanParams { int16 axis[4]; }; +struct PackParams { + int8 axis; + const int32* input_zeropoint; + const float* input_scale; + uint16 inputs_count; + int32 output_zeropoint; + float output_scale; +}; + struct PadParams { int8 left_padding_count; int32 left_padding[4]; @@ -975,6 +984,11 @@ struct TransposeParams { int32 perm[4]; }; +struct UnpackParams { + uint16 num_split; + int16 axis; +}; + template <typename P> inline void SetActivationParams(float min, float max, P* params) { params->float_activation_min = min; diff --git a/tensorflow/contrib/lite/kernels/pack.cc b/tensorflow/contrib/lite/kernels/pack.cc index 4cb98fdd19..c368582ef7 100644 --- a/tensorflow/contrib/lite/kernels/pack.cc +++ b/tensorflow/contrib/lite/kernels/pack.cc @@ -85,9 +85,12 @@ template <typename T> void PackImpl(TfLiteContext* context, TfLiteNode* node, TfLiteTensor* output, int values_count, int axis) { VectorOfTensors<T> all_inputs(*context, *node->inputs); - reference_ops::Pack<T>(RemapDim(NumDimensions(output), axis), - all_inputs.data(), all_inputs.dims(), values_count, - GetTensorData<T>(output), GetTensorDims(output)); + tflite::PackParams op_params; + op_params.axis = axis; + op_params.inputs_count = values_count; + + reference_ops::Pack<T>(op_params, all_inputs.shapes(), all_inputs.data(), + GetTensorShape(output), GetTensorData<T>(output)); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/contrib/lite/kernels/unpack.cc b/tensorflow/contrib/lite/kernels/unpack.cc index 9ff06f8331..a7d3a9bc76 100644 --- a/tensorflow/contrib/lite/kernels/unpack.cc +++ b/tensorflow/contrib/lite/kernels/unpack.cc @@ -88,10 +88,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { template <typename T> void UnpackImpl(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input, int output_count, int axis) { + tflite::UnpackParams op_params; + op_params.axis = axis; + op_params.num_split = output_count; VectorOfTensors<T> all_outputs(*context, *node->outputs); - reference_ops::Unpack<T>(axis, GetTensorData<T>(input), GetTensorDims(input), - NumDimensions(input), output_count, - all_outputs.data(), **all_outputs.dims()); + reference_ops::Unpack<T>(op_params, GetTensorShape(input), + GetTensorData<T>(input), **all_outputs.shapes(), + all_outputs.data()); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { |