diff options
author | 2018-08-30 11:17:57 -0700 | |
---|---|---|
committer | 2018-08-30 11:23:53 -0700 | |
commit | 9e12f1df3270b5e0b310645e6c3cae9fbd3f5dfc (patch) | |
tree | 6fb67b08ce4747aaf27f40d71a42edab04ea176c /tensorflow/contrib/lite/kernels/internal | |
parent | 35bae087dce1e88c66007907f9e1b6b5b2958f10 (diff) |
Consolidate refactoring of runtime shapes.
PiperOrigin-RevId: 210945714
Diffstat (limited to 'tensorflow/contrib/lite/kernels/internal')
4 files changed, 116 insertions, 75 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h index df4d871466..332e7f803b 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h @@ -296,13 +296,17 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, int output_shift, int32 output_activation_min, int32 output_activation_max, uint8* output_data, const Dims<4>& output_dims) { - BroadcastMul4DSlow( - input1_data, input1_dims, input1_offset, input2_data, input2_dims, - input2_offset, output_offset, output_multiplier, - // This legacy version switches the sign of the output shift. - kReverseShift * output_shift, - // (Break to highlight preceding line.) - output_activation_min, output_activation_max, output_data, output_dims); + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, &op_params); + op_params.input1_offset = input1_offset; + op_params.input2_offset = input2_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = kReverseShift * output_shift; + + BroadcastMul4DSlow(op_params, DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, + DimsToShape(output_dims), output_data); } // legacy, for compatibility with old checked-in code diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index e4bb4e0534..c7ee65d63a 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -5586,18 +5586,15 @@ inline void ResizeBilinearGenericSmallChannel( inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params, const RuntimeShape& unextended_input_shape, const float* input_data, - const RuntimeShape& unextended_output_size_shape, + const RuntimeShape& output_size_shape, const int32* output_size_data, const RuntimeShape& unextended_output_shape, float* output_data) { gemmlowp::ScopedProfilingLabel label("ResizeBilinear"); TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4); TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); - RuntimeShape output_size_shape = - RuntimeShape::ExtendedShape(4, unextended_output_size_shape); RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); @@ -5606,12 +5603,9 @@ inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params, int32 input_width = input_shape.Dims(2); int32 depth = MatchingDim(input_shape, 3, output_shape, 3); - TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2); - int32 output_height = output_size_data[Offset(output_size_shape, 0, 0, 0, 0)]; - int32 output_width = output_size_data[Offset(output_size_shape, 0, 0, 0, 1)]; + TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2); + int32 output_height = output_size_data[0]; + int32 output_width = output_size_data[1]; // Specialize for 2x2 upsample. if (!op_params.align_corners && output_height == 2 * input_height && @@ -5651,28 +5645,28 @@ inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, // TODO(prabhumk): This is not a real quantized bilinear. It does not use int8 // or int16 arithmetic. inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params, - const RuntimeShape& input_shape, + const RuntimeShape& unextended_input_shape, const uint8* input_data, const RuntimeShape& output_size_shape, const int32* output_size_data, - const RuntimeShape& output_shape, + const RuntimeShape& unextended_output_shape, uint8* output_data) { gemmlowp::ScopedProfilingLabel label("ResizeBilinear"); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_size_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); int32 batches = MatchingDim(input_shape, 0, output_shape, 0); int32 input_height = input_shape.Dims(1); int32 input_width = input_shape.Dims(2); int32 depth = MatchingDim(input_shape, 3, output_shape, 3); - TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2); - int32 output_height = output_size_data[Offset(output_size_shape, 0, 0, 0, 0)]; - int32 output_width = output_size_data[Offset(output_size_shape, 0, 0, 0, 1)]; + TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2); + int32 output_height = output_size_data[0]; + int32 output_width = output_size_data[1]; float height_scale = (op_params.align_corners && output_height > 1) diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 3875b73e05..5f84c737eb 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -4421,16 +4421,22 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, } template <typename T, typename Op> -void MaximumMinimumBroadcast4DSlow(const RuntimeShape& input1_shape, +void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, + const RuntimeShape& unextended_input2_shape, const T* input2_data, - const RuntimeShape& output_shape, + const RuntimeShape& unextended_output_shape, T* output_data, Op op) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); for (int b = 0; b < output_shape.Dims(0); ++b) { for (int y = 0; y < output_shape.Dims(1); ++y) { @@ -4459,8 +4465,8 @@ void TensorFlowMaximumMinimum(const T* input1_data, const Dims<4>& input1_dims, } template <typename T1, typename T2, typename T3, typename Cmp> -void ArgMinMax(const T3* axis, const RuntimeShape& input_shape, - const T1* input_data, const RuntimeShape& output_shape, +void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data, + const T3* input2_data, const RuntimeShape& output_shape, T2* output_data, const Cmp& cmp) { // The current ArgMax implemention can only determine the index of the maximum // value in the last dimension. So the axis argument is ignored. @@ -4469,17 +4475,19 @@ void ArgMinMax(const T3* axis, const RuntimeShape& input_shape, // 1). For the sake of simplicity, the output dimensions are equal to the // input dimensions here. We enforce the constraint that the last dimension // must always be 1. - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.Dims(3), 1); - const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, output_shape); - const int depth = input_shape.Dims(3); + const int trailing_dim = output_shape.DimensionsCount() - 1; + TFLITE_DCHECK_EQ(input1_shape.DimensionsCount(), + output_shape.DimensionsCount()); + TFLITE_DCHECK_EQ(output_shape.Dims(trailing_dim), 1); + const int outer_size = + MatchingFlatSizeSkipDim(input1_shape, trailing_dim, output_shape); + const int depth = input1_shape.Dims(trailing_dim); for (int i = 0; i < outer_size; ++i) { - auto min_max_value = input_data[i * depth]; + auto min_max_value = input1_data[i * depth]; int min_max_index = 0; for (int d = 1; d < depth; ++d) { - const auto& curr_value = input_data[i * depth + d]; + const auto& curr_value = input1_data[i * depth + d]; if (cmp(curr_value, min_max_value)) { min_max_value = curr_value; min_max_index = d; @@ -4493,12 +4501,19 @@ void ArgMinMax(const T3* axis, const RuntimeShape& input_shape, template <typename T1, typename T2, typename T3, typename Cmp> void ArgMinMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims, T2* output_data, const Dims<4>& output_dims, const Cmp& cmp) { - ArgMinMax(axis, DimsToShape(input_dims), input_data, DimsToShape(output_dims), + ArgMinMax(DimsToShape(input_dims), input_data, axis, DimsToShape(output_dims), output_data, cmp); } +template <typename T1, typename T2, typename T3> +void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data, + const T3* input2_data, const RuntimeShape& output_shape, + T2* output_data) { + ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, + std::greater<T1>()); +} + // Legacy. -// TODO(renjieliu): Remove this one. template <typename T1, typename T2, typename T3> void ArgMax(const T3* axis, const T1* input_data, const tflite::Dims<4>& input_dims, T2* output_data, @@ -4938,14 +4953,20 @@ inline void Logical(const bool* input1_data, const Dims<4>& input1_dims, } inline void BroadcastLogical4DSlow( - const RuntimeShape& input1_shape, const bool* input1_data, - const RuntimeShape& input2_shape, const bool* input2_data, - const RuntimeShape& output_shape, bool* output_data, + const RuntimeShape& unextended_input1_shape, const bool* input1_data, + const RuntimeShape& unextended_input2_shape, const bool* input2_data, + const RuntimeShape& unextended_output_shape, bool* output_data, const std::function<bool(bool, bool)>& func) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); for (int b = 0; b < output_shape.Dims(0); ++b) { for (int y = 0; y < output_shape.Dims(1); ++y) { @@ -4982,16 +5003,21 @@ inline void BroadcastLogical(const bool* input1_data, // // R: Result type. T1: Input 1 type. T2: Input 2 type. template <typename R, typename T1, typename T2> -inline void BroadcastBinaryFunction4DSlow(const RuntimeShape& input1_shape, - const T1* input1_data, - const RuntimeShape& input2_shape, - const T2* input2_data, - const RuntimeShape& output_shape, - R* output_data, R (*func)(T1, T2)) { +inline void BroadcastBinaryFunction4DSlow( + const RuntimeShape& unextended_input1_shape, const T1* input1_data, + const RuntimeShape& unextended_input2_shape, const T2* input2_data, + const RuntimeShape& unextended_output_shape, R* output_data, + R (*func)(T1, T2)) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); for (int b = 0; b < output_shape.Dims(0); ++b) { for (int y = 0; y < output_shape.Dims(1); ++y) { @@ -5024,6 +5050,22 @@ inline void BroadcastBinaryFunction(const T1* input1_data, DimsToShape(output_dims), output_data, func); } +// R: Result type. T1: Input 1 type. T2: Input 2 type. +// TODO(renjieliu): Refactor other binary functions to use this one. +template <typename R, typename T1, typename T2> +inline void BinaryFunction(const RuntimeShape& input1_shape, + const T1* input1_data, + const RuntimeShape& input2_shape, + const T2* input2_data, + const RuntimeShape& output_shape, R* output_data, + R (*func)(T1, T2)) { + const int flat_size = + MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = func(input1_data[i], input2_data[i]); + } +} + // Legacy Dims<4> version. // // R: Result type. T1: Input 1 type. T2: Input 2 type. @@ -5033,10 +5075,9 @@ inline void BinaryFunction(const T1* input1_data, const Dims<4>& input1_dims, const T2* input2_data, const Dims<4>& input2_dims, R* output_data, const Dims<4>& output_dims, R (*func)(T1, T2)) { - const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = func(input1_data[i], input2_data[i]); - } + BinaryFunction(DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, + DimsToShape(output_dims), output_data, func); } } // namespace reference_ops diff --git a/tensorflow/contrib/lite/kernels/internal/resize_bilinear_test.cc b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_test.cc index 3d8765f11b..15df31f75a 100644 --- a/tensorflow/contrib/lite/kernels/internal/resize_bilinear_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_test.cc @@ -28,14 +28,12 @@ template <typename T> void TestOneResizeBilinear(int batch, int depth, int input_width, int input_height, int output_width, int output_height, float error_threshold) { - Dims<4> input_dims_inference = - MakeDimsForInference(depth, input_width, input_height, batch); - Dims<4> output_dims_inference = - MakeDimsForInference(depth, output_width, output_height, batch); + RuntimeShape input_dims_inference({batch, input_height, input_width, depth}); + RuntimeShape output_dims_inference( + {batch, output_height, output_width, depth}); - const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference); - const int output_buffer_size = - RequiredBufferSizeForDims(output_dims_inference); + const int input_buffer_size = input_dims_inference.FlatSize(); + const int output_buffer_size = output_dims_inference.FlatSize(); std::vector<T> input_data(input_buffer_size, 0); std::vector<T> reference_output_data(output_buffer_size, 0); @@ -47,15 +45,19 @@ void TestOneResizeBilinear(int batch, int depth, int input_width, const T max_amplitude = static_cast<T>(255); FillRandom(&input_data, min_amplitude, max_amplitude); - Dims<4> output_size_dims = MakeDimsForInference(2, 1, 1, 1); + RuntimeShape output_size_dims({1, 1, 1, 2}); std::vector<int32> output_size_data = {output_height, output_width}; - reference_ops::ResizeBilinear( - input_data.data(), input_dims_inference, output_size_data.data(), - output_size_dims, reference_output_data.data(), output_dims_inference); - optimized_ops::ResizeBilinear(input_data.data(), input_dims_inference, - output_size_data.data(), output_size_dims, - output_data.data(), output_dims_inference); + tflite::ResizeBilinearParams op_params; + op_params.align_corners = false; + + reference_ops::ResizeBilinear(op_params, input_dims_inference, + input_data.data(), output_size_dims, + output_size_data.data(), output_dims_inference, + reference_output_data.data()); + optimized_ops::ResizeBilinear( + op_params, input_dims_inference, input_data.data(), output_size_dims, + output_size_data.data(), output_dims_inference, output_data.data()); double sum_diff = 0; float max_abs_val = 0; |