aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-09-17 08:21:43 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-17 08:26:16 -0700
commite0d6830999a6e7c92f047e6e89c3aba20911cc8c (patch)
tree09ac1884b657de19a4dc4b2f796b8682ca803003
parent055e5a0f71c83bab3f645d1c2e2cadeff5ff654f (diff)
Convert more kernel signatures to use runtime shapes.
PiperOrigin-RevId: 213275003
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h109
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h123
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h66
-rw-r--r--tensorflow/contrib/lite/kernels/internal/types.h2
4 files changed, 206 insertions, 94 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index 70810ca784..f2d1319801 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -907,25 +907,40 @@ inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
}
}
-inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
- const float* filter_data, const Dims<4>& filter_dims,
- const float* bias_data, const Dims<4>& bias_dims,
- int stride_width, int stride_height, int pad_width,
- int pad_height, int depth_multiplier,
- float output_activation_min,
- float output_activation_max, float* output_data,
- const Dims<4>& output_dims) {
+inline void DepthwiseConv(
+ const DepthwiseParams& params, const RuntimeShape& input_shape,
+ const float* input_data, const RuntimeShape& filter_shape,
+ const float* filter_data, const RuntimeShape& bias_shape,
+ const float* bias_data, const RuntimeShape& output_shape,
+ float* output_data) {
gemmlowp::ScopedProfilingLabel label("DepthwiseConv");
- const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
- const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
- const int input_height = ArraySize(input_dims, 2);
- const int input_width = ArraySize(input_dims, 1);
- const int input_depth = ArraySize(input_dims, 0);
- const int filter_height = ArraySize(filter_dims, 2);
- const int filter_width = ArraySize(filter_dims, 1);
- const int output_height = ArraySize(output_dims, 2);
- const int output_width = ArraySize(output_dims, 1);
- TFLITE_DCHECK(output_depth == input_depth * depth_multiplier);
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int depth_multiplier = params.depth_multiplier;
+ const float output_activation_min = params.float_activation_min;
+ const float output_activation_max = params.float_activation_max;
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+ // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
+ // be implemented.
+ TFLITE_DCHECK_EQ(params.dilation_width_factor, 1);
+ TFLITE_DCHECK_EQ(params.dilation_height_factor, 1);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = input_shape.Dims(3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+ TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
static const int kAccBufferMaxSize = 2048;
float acc_buffer[kAccBufferMaxSize];
@@ -990,6 +1005,10 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
row_accum_func = FloatDepthwiseConvAccumRowGeneric;
}
+ const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+ const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+ const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
// Now that we have determined row_accum_func, we can start work.
float* output_ptr = output_data;
for (int b = 0; b < batches; ++b) {
@@ -1014,13 +1033,12 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
const int in_y = in_y_origin + filter_y;
- row_accum_func(stride_width, input_depth, input_width,
- input_data + in_y * input_dims.strides[2] +
- b * input_dims.strides[3],
- pad_width, depth_multiplier, filter_width,
- filter_data + filter_y * filter_dims.strides[2],
- out_x_buffer_start, out_x_buffer_end, output_depth,
- acc_buffer);
+ row_accum_func(
+ stride_width, input_depth, input_width,
+ input_data + in_y * input_height_stride + b * input_batch_stride,
+ pad_width, depth_multiplier, filter_width,
+ filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+ out_x_buffer_end, output_depth, acc_buffer);
}
// Finished accumulating. Now store to destination.
const int num_output_values = output_depth * num_output_pixels;
@@ -1067,6 +1085,8 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
}
}
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
const float* filter_data, const Dims<4>& filter_dims,
const float* bias_data, const Dims<4>& bias_dims,
@@ -1078,15 +1098,43 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
const Dims<4>& output_dims) {
// TODO(suharshs): Optimized implementation of dilation depthwise conv need to
// be implemented.
- TFLITE_DCHECK(dilation_width_factor == 1);
- TFLITE_DCHECK(dilation_height_factor == 1);
+ TFLITE_DCHECK_EQ(dilation_width_factor, 1);
+ TFLITE_DCHECK_EQ(dilation_height_factor, 1);
+ tflite::DepthwiseParams op_params;
+ // Padding type is ignored, but still set.
+ op_params.padding_type = PaddingType::kSame;
+ op_params.padding_values.width = pad_width;
+ op_params.padding_values.height = pad_height;
+ op_params.stride_width = stride_width;
+ op_params.stride_height = stride_height;
+ op_params.dilation_width_factor = dilation_width_factor;
+ op_params.dilation_height_factor = dilation_height_factor;
+ op_params.depth_multiplier = depth_multiplier;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+ DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+ bias_data, DimsToShape(output_dims), output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+ const float* filter_data, const Dims<4>& filter_dims,
+ const float* bias_data, const Dims<4>& bias_dims,
+ int stride_width, int stride_height, int pad_width,
+ int pad_height, int depth_multiplier,
+ float output_activation_min,
+ float output_activation_max, float* output_data,
+ const Dims<4>& output_dims) {
DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
- bias_dims, stride_width, stride_height, pad_width, pad_height,
- depth_multiplier, output_activation_min, output_activation_max,
- output_data, output_dims);
+ bias_dims, stride_width, stride_height, 1, 1, pad_width,
+ pad_height, depth_multiplier, output_activation_min,
+ output_activation_max, output_data, output_dims);
}
+// TODO(b/80418076): Move to legacy ops file, update invocations.
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
@@ -1103,6 +1151,7 @@ void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
output_data, output_dims);
}
+// TODO(b/80418076): Move to legacy ops file, update invocations.
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index f707279600..ccb9d1654f 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1669,33 +1669,50 @@ inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
}
}
-inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
- int32 input_offset, const uint8* filter_data,
- const Dims<4>& filter_dims, int32 filter_offset,
- const int32* bias_data, const Dims<4>& bias_dims,
- int stride_width, int stride_height, int pad_width,
- int pad_height, int depth_multiplier,
- int32 output_offset, int32 output_multiplier,
- int output_shift, int32 output_activation_min,
- int32 output_activation_max, uint8* output_data,
- const Dims<4>& output_dims) {
+inline void DepthwiseConv(
+ const DepthwiseParams& params, const RuntimeShape& input_shape,
+ const uint8* input_data, const RuntimeShape& filter_shape,
+ const uint8* filter_data, const RuntimeShape& bias_shape,
+ const int32* bias_data, const RuntimeShape& output_shape,
+ uint8* output_data) {
gemmlowp::ScopedProfilingLabel label("DepthwiseConv/8bit");
- TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int depth_multiplier = params.depth_multiplier;
+ const int32 output_activation_min = params.quantized_activation_min;
+ const int32 output_activation_max = params.quantized_activation_max;
+ const int32 input_offset = params.input_offset;
+ const int32 filter_offset = params.weights_offset;
+ const int32 output_offset = params.output_offset;
+ const int32 output_multiplier = params.output_multiplier;
+ const int output_shift = params.output_shift;
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+ // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
+ // be implemented.
+ TFLITE_DCHECK_EQ(params.dilation_width_factor, 1);
+ TFLITE_DCHECK_EQ(params.dilation_height_factor, 1);
- const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
- const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
- const int input_height = ArraySize(input_dims, 2);
- const int input_width = ArraySize(input_dims, 1);
- const int input_depth = ArraySize(input_dims, 0);
- const int filter_height = ArraySize(filter_dims, 2);
- const int filter_width = ArraySize(filter_dims, 1);
- const int output_height = ArraySize(output_dims, 2);
- const int output_width = ArraySize(output_dims, 1);
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = input_shape.Dims(3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
#ifdef USE_NEON
const bool shift_left = (output_shift <= 0);
const int32 multiplier_power_of_two = shift_left ? (1 << -output_shift) : 1;
#endif
- TFLITE_DCHECK(output_depth == input_depth * depth_multiplier);
+ TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
+ TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
// Jetson TX-2. This compiler does not support the offsetof() macro.
@@ -1703,14 +1720,11 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
// Call kernel optimized for depthwise convolutions using 3x3 filters if
// parameters are supported.
if (Fast3x3FilterKernelSupported(
- input_dims, filter_dims, stride_width, stride_height, pad_width,
- pad_height, depth_multiplier, output_dims, output_shift)) {
- DepthwiseConv3x3Filter(input_data, input_dims, input_offset, filter_data,
- filter_dims, filter_offset, bias_data, bias_dims,
- stride_width, stride_height, pad_width, pad_height,
- depth_multiplier, output_offset, output_multiplier,
- output_shift, output_activation_min,
- output_activation_max, output_data, output_dims);
+ input_shape, filter_shape, stride_width, stride_height, pad_width,
+ pad_height, depth_multiplier, output_shape, output_shift)) {
+ DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
return;
}
#endif
@@ -1785,6 +1799,10 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+ const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+ const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+ const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
// Now that we have determined row_accum_func, we can start work.
uint8* output_ptr = output_data;
for (int b = 0; b < batches; ++b) {
@@ -1811,10 +1829,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
const int in_y = in_y_origin + filter_y;
row_accum_func(
stride_width, input_depth, input_width,
- input_data + in_y * input_dims.strides[2] +
- b * input_dims.strides[3],
+ input_data + in_y * input_height_stride + b * input_batch_stride,
input_offset, pad_width, depth_multiplier, filter_width,
- filter_data + filter_y * filter_dims.strides[2], filter_offset,
+ filter_data + filter_y * filter_height_stride, filter_offset,
out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer);
}
// Finished accumulating int32 values. Now need to convert them to
@@ -1964,6 +1981,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
}
}
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
int32 input_offset, const uint8* filter_data,
const Dims<4>& filter_dims, int32 filter_offset,
@@ -1975,19 +1994,48 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
int output_shift, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
- // TODO(suharshs): Optimized implementation of dilation depthwise is not
- // supported yet.
- TFLITE_DCHECK(dilation_width_factor == 1);
- TFLITE_DCHECK(dilation_height_factor == 1);
+ tflite::DepthwiseParams op_params;
+ // Padding type is ignored, but still set.
+ op_params.padding_type = PaddingType::kSame;
+ op_params.padding_values.width = pad_width;
+ op_params.padding_values.height = pad_height;
+ op_params.stride_width = stride_width;
+ op_params.stride_height = stride_height;
+ op_params.dilation_width_factor = dilation_width_factor;
+ op_params.dilation_height_factor = dilation_height_factor;
+ op_params.depth_multiplier = depth_multiplier;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.input_offset = input_offset;
+ op_params.weights_offset = filter_offset;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+
+ DepthwiseConv(op_params, DimsToShape(input_dims), input_data,
+ DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+ bias_data, DimsToShape(output_dims), output_data);
+}
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+ int32 input_offset, const uint8* filter_data,
+ const Dims<4>& filter_dims, int32 filter_offset,
+ const int32* bias_data, const Dims<4>& bias_dims,
+ int stride_width, int stride_height, int pad_width,
+ int pad_height, int depth_multiplier,
+ int32 output_offset, int32 output_multiplier,
+ int output_shift, int32 output_activation_min,
+ int32 output_activation_max, uint8* output_data,
+ const Dims<4>& output_dims) {
DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
filter_offset, bias_data, bias_dims, stride_width,
- stride_height, pad_width, pad_height, depth_multiplier,
+ stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
output_offset, output_multiplier, output_shift,
output_activation_min, output_activation_max, output_data,
output_dims);
}
+// TODO(b/80418076): Move to legacy ops file, update invocations.
// Legacy, for compatibility with old checked-in code.
template <FusedActivationFunctionType Ac>
void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
@@ -2011,6 +2059,7 @@ void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
output_dims);
}
+// TODO(b/80418076): Move to legacy ops file, update invocations.
// Legacy, for compatibility with old checked-in code.
template <FusedActivationFunctionType Ac>
void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
index 0ce64f8c70..9fed53cafb 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@@ -3175,16 +3175,17 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
}
inline bool Fast3x3FilterKernelSupported(
- const Dims<4>& input_dims, const Dims<4>& filter_dims, int32 stride_width,
- int32 stride_height, int32 pad_width, int32 pad_height,
- int32 depth_multiplier, const Dims<4>& output_dims, int32 output_shift) {
- const int32 input_height = ArraySize(input_dims, 2);
- const int32 input_width = ArraySize(input_dims, 1);
- const int32 input_depth = ArraySize(input_dims, 0);
- const int32 filter_height = ArraySize(filter_dims, 2);
- const int32 filter_width = ArraySize(filter_dims, 1);
- const int32 output_height = ArraySize(output_dims, 2);
- const int32 output_width = ArraySize(output_dims, 1);
+ const RuntimeShape& input_shape, const RuntimeShape& filter_shape,
+ int32 stride_width, int32 stride_height, int32 pad_width, int32 pad_height,
+ int32 depth_multiplier, const RuntimeShape& output_shape,
+ int32 output_shift) {
+ const int32 input_height = input_shape.Dims(1);
+ const int32 input_width = input_shape.Dims(2);
+ const int32 input_depth = input_shape.Dims(3);
+ const int32 filter_height = filter_shape.Dims(1);
+ const int32 filter_width = filter_shape.Dims(2);
+ const int32 output_height = output_shape.Dims(1);
+ const int32 output_width = output_shape.Dims(2);
bool supported =
filter_width == 3 && filter_height == 3 && depth_multiplier == 1 &&
@@ -3234,26 +3235,37 @@ inline bool Fast3x3FilterKernelSupported(
}
inline void DepthwiseConv3x3Filter(
- const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
- const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
- const int32* bias_data, const Dims<4>& bias_dims, int32 stride_width,
- int32 stride_height, int32 pad_width, int32 pad_height,
- int32 depth_multiplier, int32 output_offset, int32 output_multiplier,
- int32 output_shift, int32 output_activation_min,
- int32 output_activation_max, uint8* output_data,
- const Dims<4>& output_dims) {
+ const DepthwiseParams& rt_params, const RuntimeShape& input_shape,
+ const uint8* input_data, const RuntimeShape& filter_shape,
+ const uint8* filter_data, const RuntimeShape& bias_shape,
+ const int32* bias_data, const RuntimeShape& output_shape,
+ uint8* output_data) {
gemmlowp::ScopedProfilingLabel label(__PRETTY_FUNCTION__);
DepthwiseConvParams params;
- params.input_depth = ArraySize(input_dims, 0);
- params.input_width = ArraySize(input_dims, 1);
- params.input_height = ArraySize(input_dims, 2);
+
+ const int32 stride_width = rt_params.stride_width;
+ const int32 stride_height = rt_params.stride_height;
+ const int32 pad_width = rt_params.padding_values.width;
+ const int32 pad_height = rt_params.padding_values.height;
+ const int32 depth_multiplier = rt_params.depth_multiplier;
+ const int32 output_activation_min = rt_params.quantized_activation_min;
+ const int32 output_activation_max = rt_params.quantized_activation_max;
+ const int32 input_offset = rt_params.input_offset;
+ const int32 filter_offset = rt_params.weights_offset;
+ const int32 output_offset = rt_params.output_offset;
+ const int32 output_multiplier = rt_params.output_multiplier;
+ const int32 output_shift = rt_params.output_shift;
+
+ params.input_depth = input_shape.Dims(3);
+ params.input_width = input_shape.Dims(2);
+ params.input_height = input_shape.Dims(1);
params.input_row_size = params.input_depth * params.input_width;
params.input_offset = input_offset;
params.stride_width = stride_width;
params.stride_height = stride_height;
- params.output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
- params.output_width = ArraySize(output_dims, 1);
- params.output_height = ArraySize(output_dims, 2);
+ params.output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ params.output_width = output_shape.Dims(2);
+ params.output_height = output_shape.Dims(1);
params.output_row_size = params.output_depth * params.output_width;
params.output_offset = output_offset;
params.filter_offset = filter_offset;
@@ -3262,8 +3274,8 @@ inline void DepthwiseConv3x3Filter(
params.output_activation_min = output_activation_min;
params.output_activation_max = output_activation_max;
- const int32 filter_height = ArraySize(filter_dims, 2);
- const int32 filter_width = ArraySize(filter_dims, 1);
+ const int32 filter_height = filter_shape.Dims(1);
+ const int32 filter_width = filter_shape.Dims(2);
params.filter_row_size = params.output_depth * filter_width;
// Algorithm assumes below constraints. It is optimized for depth
@@ -3279,7 +3291,7 @@ inline void DepthwiseConv3x3Filter(
TFLITE_DCHECK(pad_width == 0 || pad_width == 1);
TFLITE_DCHECK(pad_width == pad_height);
- const int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+ const int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
const int64_t input_batch_size = params.input_row_size * params.input_height;
const int64_t output_batch_size =
params.output_row_size * params.output_height;
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index f6636acc58..ac4626bc30 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -772,6 +772,8 @@ struct DepthwiseParams {
PaddingValues padding_values;
int16 stride_width;
int16 stride_height;
+ int16 dilation_width_factor;
+ int16 dilation_height_factor;
int16 depth_multiplier;
// uint8 inference params.
// TODO(b/65838351): Use smaller types if appropriate.