aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/kernels
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-09-26 13:42:36 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-26 13:47:11 -0700
commit652ce1aaefdadd04a9905a0788ab26c6fff93658 (patch)
tree58a766a41069b3985de75ab092d3672d4d737a4d /tensorflow/contrib/lite/kernels
parentd600b1b55fa851648918fed7a67f61eefd554034 (diff)
Kernel signature reworking, misc kernel improvements and migrations.
PiperOrigin-RevId: 214661332
Diffstat (limited to 'tensorflow/contrib/lite/kernels')
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h1
-rw-r--r--tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h149
-rw-r--r--tensorflow/contrib/lite/kernels/internal/tensor.h12
-rw-r--r--tensorflow/contrib/lite/kernels/internal/types.h14
-rw-r--r--tensorflow/contrib/lite/kernels/pack.cc9
-rw-r--r--tensorflow/contrib/lite/kernels/unpack.cc9
6 files changed, 139 insertions, 55 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 0999738396..732880d9da 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -59,6 +59,7 @@ using reference_ops::BroadcastLessEqual;
using reference_ops::BroadcastMul4DSlow;
using reference_ops::BroadcastSub4DSlow;
using reference_ops::Concatenation;
+using reference_ops::ConcatenationWithScaling;
using reference_ops::DepthConcatenation;
using reference_ops::Dequantize;
using reference_ops::Div;
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 7a5535489a..cd9e1b255d 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -1800,7 +1800,6 @@ inline void Concatenation(int concat_dim, const Scalar* const* input_data,
// quantized as it takes scale as a floating point value. This should be fixed
// when optimizng this routine further.
-// template <>
inline void ConcatenationWithScaling(const ConcatenationParams& params,
const RuntimeShape* const* input_shapes,
const uint8* const* input_data,
@@ -1813,15 +1812,13 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
const int32 output_zeropoint = params.output_zeropoint;
const float output_scale = params.output_scale;
- // The arguments input_zeropoint and input_scale are expected to be an array
- // that have the quantization parameters for all the inputs to the concat
- // operator.
- TFLITE_DCHECK_GT(inputs_count, 1);
- TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+ const int concat_dimensions = output_shape.DimensionsCount();
+ TFLITE_DCHECK_LT(axis, concat_dimensions);
+
int64_t concat_size = 0;
for (int i = 0; i < inputs_count; i++) {
- TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), 4);
- for (int j = 0; j < 4; j++) {
+ TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
+ for (int j = 0; j < concat_dimensions; j++) {
if (j != axis) {
MatchingDim(*input_shapes[i], j, output_shape, j);
}
@@ -1836,9 +1833,10 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
// For all input arrays,
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
int64_t base_inner_size = 1;
- for (int i = axis + 1; i < 4; ++i) {
+ for (int i = axis + 1; i < concat_dimensions; ++i) {
base_inner_size *= output_shape.Dims(i);
}
+
const float inverse_output_scale = 1.f / output_scale;
uint8* output_ptr = output_data;
for (int k = 0; k < outer_size; k++) {
@@ -1892,37 +1890,51 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data,
}
template <typename Scalar>
-void Pack(int dim, const Scalar* const* input_data,
- const Dims<4>* const* input_dims, int inputs_count,
- Scalar* output_data, const Dims<4>& output_dims) {
- TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+void Pack(const PackParams& params, const RuntimeShape* const* input_shapes,
+ const Scalar* const* input_data, const RuntimeShape& output_shape,
+ Scalar* output_data) {
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ int inputs_count = params.inputs_count;
+
int outer_size = 1;
- for (int i = dim + 1; i < 4; i++) {
- outer_size *= output_dims.sizes[i];
+ for (int i = 0; i < axis; i++) {
+ outer_size *= output_shape.Dims(i);
}
- Scalar* output_ptr = output_data;
- const int copy_size = FlatSize(**input_dims) / outer_size;
- for (int k = 0; k < outer_size; k++) {
- for (int i = 0; i < inputs_count; ++i) {
- memcpy(output_ptr, input_data[i] + k * copy_size,
- copy_size * sizeof(Scalar));
- output_ptr += copy_size;
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++) {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < inputs_count; ++i) {
+ for (int k = 0; k < outer_size; k++) {
+ const Scalar* input_ptr = input_data[i] + copy_size * k;
+ int loc = k * inputs_count * copy_size + i * copy_size;
+ memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
}
}
}
template <typename Scalar>
-void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims,
- int dimensions, int outputs_count, Scalar* const* output_datas,
- const Dims<4>& output_dims) {
+void Unpack(const UnpackParams& params, const RuntimeShape& input_shape,
+ const Scalar* input_data, const RuntimeShape& output_shape,
+ Scalar* const* output_datas) {
+ const int dimensions = input_shape.DimensionsCount();
+ const int outputs_count = params.num_split;
+
int outer_size = 1;
- for (int i = dimensions - axis; i < 4; i++) {
- outer_size *= input_dims.sizes[i];
+ for (int i = 0; i < params.axis; i++) {
+ outer_size *= input_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++) {
+ copy_size *= input_shape.Dims(i);
}
+ TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
- const int copy_size = FlatSize(input_dims) / outer_size / outputs_count;
- for (int k = 0; k < outer_size; k++) {
- for (int i = 0; i < outputs_count; ++i) {
+ for (int i = 0; i < outputs_count; ++i) {
+ for (int k = 0; k < outer_size; k++) {
Scalar* output_ptr = output_datas[i] + copy_size * k;
int loc = k * outputs_count * copy_size + i * copy_size;
memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
@@ -1931,18 +1943,29 @@ void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims,
}
template <typename Scalar>
-void Pack(int dim, const Scalar* const* input_data,
- const Dims<4>* const* input_dims, const int32* input_zeropoint,
- const float* input_scale, int inputs_count, Scalar* output_data,
- const Dims<4>& output_dims, const int32 output_zeropoint,
- const float output_scale) {
- TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+void PackWithScaling(const PackParams& params,
+ const RuntimeShape* const* input_shapes,
+ const uint8* const* input_data,
+ const RuntimeShape& output_shape, uint8* output_data) {
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ const int32* input_zeropoint = params.input_zeropoint;
+ const float* input_scale = params.input_scale;
+ int inputs_count = params.inputs_count;
+ const int32 output_zeropoint = params.output_zeropoint;
+ const float output_scale = params.output_scale;
+
int outer_size = 1;
- for (int i = dim + 1; i < 4; i++) {
- outer_size *= output_dims.sizes[i];
+ for (int i = 0; i < axis; i++) {
+ outer_size *= output_shape.Dims(i);
}
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; i++) {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
Scalar* output_ptr = output_data;
- const int copy_size = FlatSize(**input_dims) / outer_size;
const float inverse_output_scale = 1.f / output_scale;
for (int k = 0; k < outer_size; k++) {
for (int i = 0; i < inputs_count; ++i) {
@@ -3374,15 +3397,21 @@ inline void Floor(const RuntimeShape& input_shape, const float* input_data,
template <typename T>
inline void Gather(const tflite::GatherParams& op_params,
- const RuntimeShape& input_shape, const T* input_data,
- const RuntimeShape& coords_shape, const int32* coords_data,
- const RuntimeShape& output_shape, T* output_data) {
- // Enable these checks when moving legacy ops to legacy_reference_ops.
- //
- // TFLITE_DCHECK_EQ(coords_shape.DimensionsCount(), 1);
+ const RuntimeShape& unextended_input_shape,
+ const T* input_data, const RuntimeShape& coords_shape,
+ const int32* coords_data,
+ const RuntimeShape& unextended_output_shape,
+ T* output_data) {
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape =
+ RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape output_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
const int input_rank = op_params.input_rank;
const int gather_dimensions = output_shape.DimensionsCount();
- TFLITE_DCHECK_LE(input_shape.DimensionsCount(), gather_dimensions);
+ TFLITE_DCHECK_GE(input_shape.DimensionsCount(), gather_dimensions);
const int axis = gather_dimensions - input_rank;
TFLITE_DCHECK_LT(axis, gather_dimensions);
TFLITE_DCHECK_GE(axis, 0);
@@ -4762,22 +4791,44 @@ inline void BroadcastComparison(int left_shift, const T* input1_data,
input2_data, output_shape, output_data); \
} \
template <typename T> \
+ inline void name##NoScaling( \
+ const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
+ const T* input1_data, const RuntimeShape& input2_shape, \
+ const T* input2_data, const RuntimeShape& output_shape, \
+ bool* output_data) { \
+ gemmlowp::ScopedProfilingLabel label(#name "NoScaling"); \
+ ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data, \
+ input2_shape, input2_data, output_shape, \
+ output_data); \
+ } \
+ template <typename T> \
inline void name##WithScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
- gemmlowp::ScopedProfilingLabel label(#name "/8bit"); \
+ gemmlowp::ScopedProfilingLabel label(#name "WithScaling/8bit"); \
ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, \
output_shape, output_data); \
} \
+ template <typename T> \
+ inline void Broadcast4DSlow##name##NoScaling( \
+ const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
+ const T* input1_data, const RuntimeShape& input2_shape, \
+ const T* input2_data, const RuntimeShape& output_shape, \
+ bool* output_data) { \
+ gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name "NoScaling"); \
+ BroadcastComparison4DSlowImpl<T, name##Fn>( \
+ op_params, input1_shape, input1_data, input2_shape, input2_data, \
+ output_shape, output_data); \
+ } \
inline void Broadcast4DSlow##name( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const float* input1_data, const RuntimeShape& input2_shape, \
const float* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
- gemmlowp::ScopedProfilingLabel label("Broadcast" #name); \
+ gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name); \
BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, \
output_shape, output_data); \
@@ -4788,7 +4839,7 @@ inline void BroadcastComparison(int left_shift, const T* input1_data,
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
- gemmlowp::ScopedProfilingLabel label("Broadcast" #name "/8bit"); \
+ gemmlowp::ScopedProfilingLabel label("Broadcast4DSlow" #name "/8bit"); \
BroadcastComparison4DSlowWithScaling<T, name##Fn>( \
op_params, input1_shape, input1_data, input2_shape, input2_data, \
output_shape, output_data); \
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h
index 13106456df..f1b08383b0 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor.h
@@ -58,11 +58,14 @@ class VectorOfTensors {
all_data_.reserve(num_tensors);
all_dims_.reserve(num_tensors);
all_dims_ptr_.reserve(num_tensors);
+ all_shape_.reserve(num_tensors);
+ all_shape_ptr_.reserve(num_tensors);
for (int i = 0; i < num_tensors; ++i) {
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
all_data_.push_back(GetTensorData<T>(t));
all_dims_.push_back(GetTensorDims(t));
+ all_shape_.push_back(GetTensorShape(t));
}
// Taking the pointer from inside a std::vector is only OK if the vector is
@@ -70,6 +73,7 @@ class VectorOfTensors {
// are free to grab iterators here.
for (int i = 0; i < num_tensors; ++i) {
all_dims_ptr_.push_back(&all_dims_[i]);
+ all_shape_ptr_.push_back(&all_shape_[i]);
}
}
// Return a pointer to the data pointers of all tensors in the list. For
@@ -84,10 +88,18 @@ class VectorOfTensors {
// dims[1] are the dimensions of the second tensor in the list.
const Dims<4>* const* dims() const { return all_dims_ptr_.data(); }
+ // Return a pointer the shape pointers of all tensors in the list. For
+ // example:
+ // const RuntimeShape* const* d = v.dims();
+ // dims[1] are the dimensions of the second tensor in the list.
+ const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
+
private:
std::vector<T*> all_data_;
std::vector<Dims<4>> all_dims_;
std::vector<Dims<4>*> all_dims_ptr_;
+ std::vector<RuntimeShape> all_shape_;
+ std::vector<RuntimeShape*> all_shape_ptr_;
};
// A list of quantized tensors in a format that can be used by kernels like
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index a3a5994c9c..b39347758a 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -875,6 +875,15 @@ struct MeanParams {
int16 axis[4];
};
+struct PackParams {
+ int8 axis;
+ const int32* input_zeropoint;
+ const float* input_scale;
+ uint16 inputs_count;
+ int32 output_zeropoint;
+ float output_scale;
+};
+
struct PadParams {
int8 left_padding_count;
int32 left_padding[4];
@@ -975,6 +984,11 @@ struct TransposeParams {
int32 perm[4];
};
+struct UnpackParams {
+ uint16 num_split;
+ int16 axis;
+};
+
template <typename P>
inline void SetActivationParams(float min, float max, P* params) {
params->float_activation_min = min;
diff --git a/tensorflow/contrib/lite/kernels/pack.cc b/tensorflow/contrib/lite/kernels/pack.cc
index 4cb98fdd19..c368582ef7 100644
--- a/tensorflow/contrib/lite/kernels/pack.cc
+++ b/tensorflow/contrib/lite/kernels/pack.cc
@@ -85,9 +85,12 @@ template <typename T>
void PackImpl(TfLiteContext* context, TfLiteNode* node, TfLiteTensor* output,
int values_count, int axis) {
VectorOfTensors<T> all_inputs(*context, *node->inputs);
- reference_ops::Pack<T>(RemapDim(NumDimensions(output), axis),
- all_inputs.data(), all_inputs.dims(), values_count,
- GetTensorData<T>(output), GetTensorDims(output));
+ tflite::PackParams op_params;
+ op_params.axis = axis;
+ op_params.inputs_count = values_count;
+
+ reference_ops::Pack<T>(op_params, all_inputs.shapes(), all_inputs.data(),
+ GetTensorShape(output), GetTensorData<T>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
diff --git a/tensorflow/contrib/lite/kernels/unpack.cc b/tensorflow/contrib/lite/kernels/unpack.cc
index 9ff06f8331..a7d3a9bc76 100644
--- a/tensorflow/contrib/lite/kernels/unpack.cc
+++ b/tensorflow/contrib/lite/kernels/unpack.cc
@@ -88,10 +88,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
template <typename T>
void UnpackImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input, int output_count, int axis) {
+ tflite::UnpackParams op_params;
+ op_params.axis = axis;
+ op_params.num_split = output_count;
VectorOfTensors<T> all_outputs(*context, *node->outputs);
- reference_ops::Unpack<T>(axis, GetTensorData<T>(input), GetTensorDims(input),
- NumDimensions(input), output_count,
- all_outputs.data(), **all_outputs.dims());
+ reference_ops::Unpack<T>(op_params, GetTensorShape(input),
+ GetTensorData<T>(input), **all_outputs.shapes(),
+ all_outputs.data());
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {