aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h')
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h80
1 files changed, 54 insertions, 26 deletions
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 4776726972..e2a1a6996d 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -5851,10 +5851,26 @@ inline void BatchToSpaceND(const T* input_data, const Dims<4>& input_dims,
}
template <typename T>
-inline void Pad(const T* input_data, const Dims<4>& input_dims,
- const std::vector<int>& left_paddings,
- const std::vector<int>& right_paddings, T* output_data,
- const Dims<4>& output_dims, const int32_t pad_value) {
+void TypedMemset(void* ptr, T value, size_t num) {
+ // Optimization for common cases where memset() will suffice.
+ if (value == 0 || std::is_same<T, uint8_t>::value) {
+ memset(ptr, value, num * sizeof(T));
+ } else {
+ // Default implementation for cases where memset() will not preserve the
+ // bytes, e.g., typically when sizeof(T) > sizeof(uint8_t).
+ char* pos = static_cast<char*>(ptr);
+ for (size_t i = 0; i < num; ++i) {
+ memcpy(pos, &value, sizeof(T));
+ pos = pos + sizeof(T);
+ }
+ }
+}
+
+template <typename T>
+inline void PadV2(const T* input_data, const Dims<4>& input_dims,
+ const std::vector<int>& left_paddings,
+ const std::vector<int>& right_paddings, T* output_data,
+ const Dims<4>& output_dims, const T pad_value) {
gemmlowp::ScopedProfilingLabel label("Pad");
TFLITE_DCHECK_EQ(left_paddings.size(), 4);
TFLITE_DCHECK_EQ(right_paddings.size(), 4);
@@ -5877,27 +5893,28 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims,
const int input_depth = ArraySize(input_dims, 0);
if (left_b_padding != 0) {
- memset(output_data, pad_value,
- left_b_padding * output_height * output_width * output_depth *
- sizeof(T));
+ TypedMemset<T>(
+ output_data, pad_value,
+ left_b_padding * output_height * output_width * output_depth);
}
for (int out_b = left_b_padding; out_b < output_batch - right_b_padding;
++out_b) {
if (left_h_padding != 0) {
- memset(output_data + Offset(output_dims, 0, 0, 0, out_b), pad_value,
- left_h_padding * output_width * output_depth * sizeof(T));
+ TypedMemset<T>(output_data + Offset(output_dims, 0, 0, 0, out_b),
+ pad_value, left_h_padding * output_width * output_depth);
}
for (int out_h = left_h_padding; out_h < output_height - right_h_padding;
++out_h) {
if (left_w_padding != 0) {
- memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), pad_value,
- left_w_padding * output_depth * sizeof(T));
+ TypedMemset<T>(output_data + Offset(output_dims, 0, 0, out_h, out_b),
+ pad_value, left_w_padding * output_depth);
}
for (int out_w = left_w_padding; out_w < output_width - right_w_padding;
++out_w) {
if (left_d_padding != 0) {
- memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b),
- pad_value, left_d_padding * sizeof(T));
+ TypedMemset<T>(
+ output_data + Offset(output_dims, 0, out_w, out_h, out_b),
+ pad_value, left_d_padding);
}
T* out = output_data +
@@ -5908,35 +5925,46 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims,
memcpy(out, in, input_depth * sizeof(T));
if (right_d_padding != 0) {
- memset(
+ TypedMemset<T>(
output_data + Offset(output_dims, output_depth - right_d_padding,
out_w, out_h, out_b),
- pad_value, right_d_padding * sizeof(T));
+ pad_value, right_d_padding);
}
}
if (right_w_padding != 0) {
- memset(
+ TypedMemset<T>(
output_data + Offset(output_dims, 0, output_width - right_w_padding,
out_h, out_b),
- pad_value, right_w_padding * output_depth * sizeof(T));
+ pad_value, right_w_padding * output_depth);
}
}
if (right_h_padding != 0) {
- memset(output_data + Offset(output_dims, 0, 0,
- output_height - right_h_padding, out_b),
- pad_value,
- right_h_padding * output_width * output_depth * sizeof(T));
+ TypedMemset<T>(
+ output_data +
+ Offset(output_dims, 0, 0, output_height - right_h_padding, out_b),
+ pad_value, right_h_padding * output_width * output_depth);
}
}
if (right_b_padding != 0) {
- memset(output_data +
- Offset(output_dims, 0, 0, 0, output_batch - right_b_padding),
- 0,
- right_b_padding * output_height * output_width * output_depth *
- sizeof(T));
+ TypedMemset<T>(
+ output_data +
+ Offset(output_dims, 0, 0, 0, output_batch - right_b_padding),
+ pad_value,
+ right_b_padding * output_height * output_width * output_depth);
}
}
+// Legacy Pad() method that casts an int32_t to T before padding.
+template <typename T>
+inline void Pad(const T* input_data, const Dims<4>& input_dims,
+ const std::vector<int>& left_paddings,
+ const std::vector<int>& right_paddings, T* output_data,
+ const Dims<4>& output_dims, const int32_t pad_value) {
+ const T converted_pad_value = static_cast<T>(pad_value);
+ PadV2<T>(input_data, input_dims, left_paddings, right_paddings, output_data,
+ output_dims, converted_pad_value);
+}
+
template <typename T>
inline void Pad(const T* input_data, const Dims<4>& input_dims,
const std::vector<int>& left_paddings,