aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/resize_bilinear_op.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-02-07 09:34:15 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-02-07 09:54:56 -0800
commitab0426e203cd81fb2247475eccdfcec0ddb8bf39 (patch)
tree99095ea742171f9a218471eb30a0dbc26bf45ef1 /tensorflow/core/kernels/resize_bilinear_op.cc
parent661058e52460b36417573e2c5a73de9a8b9e5edb (diff)
Improve performance of ResizeBilinear:
- use pointer arithmetic instead of accessing data through eigen single-cell expressions. - use only scale_down case, rename to resize_image. This simplifies the code, at the cost of slightly worse performance for the scale_up case with one channel (note though that special-casing single-channel case would make it 3x faster than the AFTER times below). - pull some common arithmetic out of loops manually, so that more common indexing operations don't require multiplication. - remove some assignments in inner loops. (the first of these had the largest effect). Add some more test and benchmark cases. Also fix a typo in the ResizeArea test. Change: 146795860
Diffstat (limited to 'tensorflow/core/kernels/resize_bilinear_op.cc')
-rw-r--r--tensorflow/core/kernels/resize_bilinear_op.cc299
1 files changed, 81 insertions, 218 deletions
diff --git a/tensorflow/core/kernels/resize_bilinear_op.cc b/tensorflow/core/kernels/resize_bilinear_op.cc
index 2c5aeaada4..d9cb993a4b 100644
--- a/tensorflow/core/kernels/resize_bilinear_op.cc
+++ b/tensorflow/core/kernels/resize_bilinear_op.cc
@@ -72,43 +72,19 @@ struct CachedInterpolation {
// 1-D linear iterpolation scale (see:
// https://en.wikipedia.org/wiki/Bilinear_interpolation)
float lerp;
- // How many consecutive points use the same lower & upper indices
- int consecutive;
};
-enum ImageScalePattern { SCALE_UP, SIMILAR, SCALE_DOWN };
-
-inline ImageScalePattern compute_image_scale_pattern(const int64 out_height,
- const int64 out_width,
- const int64 in_height,
- const int64 in_width) {
- if (in_height * 2 < out_height || in_width * 2 < out_width) {
- return SCALE_UP;
- } else if (out_height * 2 < in_height || out_width * 2 < in_width) {
- return SCALE_DOWN;
- } else {
- return SIMILAR;
- }
-}
-
-inline void compute_interpolation_weights(const ImageScalePattern scale_pattern,
- const int64 out_size,
+inline void compute_interpolation_weights(const int64 out_size,
const int64 in_size,
const float scale,
CachedInterpolation* interpolation) {
interpolation[out_size].lower = 0;
interpolation[out_size].upper = 0;
- interpolation[out_size].consecutive = 0;
for (int64 i = out_size - 1; i >= 0; --i) {
const float in = i * scale;
interpolation[i].lower = static_cast<int64>(in);
interpolation[i].upper = std::min(interpolation[i].lower + 1, in_size - 1);
interpolation[i].lerp = in - interpolation[i].lower;
- interpolation[i].consecutive =
- interpolation[i + 1].lower == interpolation[i].lower &&
- interpolation[i + 1].upper == interpolation[i].upper
- ? interpolation[i + 1].consecutive + 1
- : 1;
}
}
@@ -125,200 +101,97 @@ inline float compute_lerp(const float top_left, const float top_right,
}
template <typename T>
-inline float image_lerp(const T* input_image, int64 in_x_lower,
- int64 in_x_upper, float xs_lerp, int64 in_y_lower,
- int64 in_y_upper, float ys_lerp, int c) {
- const float top_left(input_image[in_y_lower + in_x_lower + c]);
- const float top_right(input_image[in_y_lower + in_x_upper + c]);
- const float bottom_left(input_image[in_y_upper + in_x_lower + c]);
- const float bottom_right(input_image[in_y_upper + in_x_upper + c]);
- return compute_lerp(top_left, top_right, bottom_left, bottom_right, xs_lerp,
- ys_lerp);
-}
-
-template <typename T>
-void scale_down_image(
+void resize_image(
typename TTypes<T, 4>::ConstTensor images, const int batch_size,
- const int64 out_height, const int64 out_width, const int channels,
+ const int64 in_height, const int64 in_width, const int64 out_height,
+ const int64 out_width, const int channels,
const std::vector<CachedInterpolation>& xs,
const std::vector<CachedInterpolation>& ys,
typename TTypes<float, 4>::Tensor output) TF_ATTRIBUTE_NOINLINE;
template <typename T>
-void scale_down_image(typename TTypes<T, 4>::ConstTensor images,
- const int batch_size, const int64 out_height,
- const int64 out_width, const int channels,
- const std::vector<CachedInterpolation>& xs_vec,
- const std::vector<CachedInterpolation>& ys,
- typename TTypes<float, 4>::Tensor output) {
- // Do not eagerly convert all input data points, as we ignore most.
+void resize_image(typename TTypes<T, 4>::ConstTensor images,
+ const int batch_size, const int64 in_height,
+ const int64 in_width, const int64 out_height,
+ const int64 out_width, const int channels,
+ const std::vector<CachedInterpolation>& xs_vec,
+ const std::vector<CachedInterpolation>& ys,
+ typename TTypes<float, 4>::Tensor output) {
+ const int64 in_row_size = in_width * channels;
+ const int64 in_batch_num_values = in_height * in_row_size;
+ const int64 out_row_size = out_width * channels;
+
+ const T* input_b_ptr = images.data();
+ const CachedInterpolation* xs = xs_vec.data();
+
if (channels == 3) {
+ float* output_y_ptr = output.data();
for (int b = 0; b < batch_size; ++b) {
- // Compute the interpolation
for (int64 y = 0; y < out_height; ++y) {
- const int64 ys_lower = ys[y].lower;
- const int64 ys_upper = ys[y].upper;
+ const T* ys_input_lower_ptr = input_b_ptr + ys[y].lower * in_row_size;
+ const T* ys_input_upper_ptr = input_b_ptr + ys[y].upper * in_row_size;
const float ys_lerp = ys[y].lerp;
- const CachedInterpolation* xs_ptr = xs_vec.data();
for (int64 x = 0; x < out_width; ++x) {
- const int64 xs_lower = xs_ptr->lower;
- const int64 xs_upper = xs_ptr->upper;
- const float xs_lerp = xs_ptr->lerp;
- xs_ptr++;
-
- const float top_left0(images(b, ys_lower, xs_lower, 0));
- const float top_right0(images(b, ys_lower, xs_upper, 0));
- const float bottom_left0(images(b, ys_upper, xs_lower, 0));
- const float bottom_right0(images(b, ys_upper, xs_upper, 0));
- const float out0 = compute_lerp(top_left0, top_right0, bottom_left0,
- bottom_right0, xs_lerp, ys_lerp);
-
- const float top_left1(images(b, ys_lower, xs_lower, 1));
- const float top_right1(images(b, ys_lower, xs_upper, 1));
- const float bottom_left1(images(b, ys_upper, xs_lower, 1));
- const float bottom_right1(images(b, ys_upper, xs_upper, 1));
- const float out1 = compute_lerp(top_left1, top_right1, bottom_left1,
- bottom_right1, xs_lerp, ys_lerp);
-
- const float top_left2(images(b, ys_lower, xs_lower, 2));
- const float top_right2(images(b, ys_lower, xs_upper, 2));
- const float bottom_left2(images(b, ys_upper, xs_lower, 2));
- const float bottom_right2(images(b, ys_upper, xs_upper, 2));
- const float out2 = compute_lerp(top_left2, top_right2, bottom_left2,
- bottom_right2, xs_lerp, ys_lerp);
-
- float* dest = &output(b, y, x, 0);
- dest[0] = out0;
- dest[1] = out1;
- dest[2] = out2;
+ const int64 xs_lower = xs[x].lower;
+ const int64 xs_upper = xs[x].upper;
+ const float xs_lerp = xs[x].lerp;
+
+ // Read channel 0.
+ const float top_left0(ys_input_lower_ptr[xs_lower + 0]);
+ const float top_right0(ys_input_lower_ptr[xs_upper + 0]);
+ const float bottom_left0(ys_input_upper_ptr[xs_lower + 0]);
+ const float bottom_right0(ys_input_upper_ptr[xs_upper + 0]);
+
+ // Read channel 1.
+ const float top_left1(ys_input_lower_ptr[xs_lower + 1]);
+ const float top_right1(ys_input_lower_ptr[xs_upper + 1]);
+ const float bottom_left1(ys_input_upper_ptr[xs_lower + 1]);
+ const float bottom_right1(ys_input_upper_ptr[xs_upper + 1]);
+
+ // Read channel 2.
+ const float top_left2(ys_input_lower_ptr[xs_lower + 2]);
+ const float top_right2(ys_input_lower_ptr[xs_upper + 2]);
+ const float bottom_left2(ys_input_upper_ptr[xs_lower + 2]);
+ const float bottom_right2(ys_input_upper_ptr[xs_upper + 2]);
+
+ // Compute output.
+ output_y_ptr[x * channels + 0] =
+ compute_lerp(top_left0, top_right0, bottom_left0, bottom_right0,
+ xs_lerp, ys_lerp);
+ output_y_ptr[x * channels + 1] =
+ compute_lerp(top_left1, top_right1, bottom_left1, bottom_right1,
+ xs_lerp, ys_lerp);
+ output_y_ptr[x * channels + 2] =
+ compute_lerp(top_left2, top_right2, bottom_left2, bottom_right2,
+ xs_lerp, ys_lerp);
}
+ output_y_ptr += out_row_size;
}
+ input_b_ptr += in_batch_num_values;
}
} else {
+ float* output_y_ptr = output.data();
for (int b = 0; b < batch_size; ++b) {
- // Compute the interpolation
for (int64 y = 0; y < out_height; ++y) {
- const CachedInterpolation* xs = xs_vec.data();
+ const T* ys_input_lower_ptr = input_b_ptr + ys[y].lower * in_row_size;
+ const T* ys_input_upper_ptr = input_b_ptr + ys[y].upper * in_row_size;
+ const float ys_lerp = ys[y].lerp;
for (int64 x = 0; x < out_width; ++x) {
+ auto xs_lower = xs[x].lower;
+ auto xs_upper = xs[x].upper;
+ auto xs_lerp = xs[x].lerp;
for (int c = 0; c < channels; ++c) {
- const float top_left(images(b, ys[y].lower, xs[x].lower, c));
- const float top_right(images(b, ys[y].lower, xs[x].upper, c));
- const float bottom_left(images(b, ys[y].upper, xs[x].lower, c));
- const float bottom_right(images(b, ys[y].upper, xs[x].upper, c));
- output(b, y, x, c) =
- compute_lerp(top_left, top_right, bottom_left, bottom_right,
- xs[x].lerp, ys[y].lerp);
- }
- }
- }
- }
- }
-}
-
-template <typename T>
-void scale_up_image(
- const T* input_image, const int batch_index, const int64 out_height,
- const int64 out_width, const int channels, const int64 in_height,
- const int64 in_width, const std::vector<CachedInterpolation>& xs,
- const std::vector<CachedInterpolation>& ys,
- typename TTypes<float, 4>::Tensor output) TF_ATTRIBUTE_NOINLINE;
-
-template <typename T>
-void scale_up_image(const T* input_image, const int batch_index,
- const int64 out_height, const int64 out_width,
- const int channels, const int64 in_height,
- const int64 in_width,
- const std::vector<CachedInterpolation>& xs,
- const std::vector<CachedInterpolation>& ys,
- typename TTypes<float, 4>::Tensor output) {
- for (int64 y = 0; y < out_height; y += ys[y].consecutive) {
- const int64 in_y_lower = ys[y].lower * in_width * channels;
- const int64 in_y_upper = ys[y].upper * in_width * channels;
- for (int64 x = 0; x < out_width; x += xs[x].consecutive) {
- const int64 in_x_lower = xs[x].lower * channels;
- const int64 in_x_upper = xs[x].upper * channels;
- for (int c = 0; c < channels; ++c) {
- const float top_left(input_image[in_y_lower + in_x_lower + c]);
- const float top_right(input_image[in_y_lower + in_x_upper + c]);
- const float bottom_left(input_image[in_y_upper + in_x_lower + c]);
- const float bottom_right(input_image[in_y_upper + in_x_upper + c]);
- for (int64 y_inner = y; y_inner < y + ys[y].consecutive; ++y_inner) {
- for (int64 x_inner = x; x_inner < x + xs[x].consecutive; ++x_inner) {
- output(batch_index, y_inner, x_inner, c) =
+ const float top_left(ys_input_lower_ptr[xs_lower + c]);
+ const float top_right(ys_input_lower_ptr[xs_upper + c]);
+ const float bottom_left(ys_input_upper_ptr[xs_lower + c]);
+ const float bottom_right(ys_input_upper_ptr[xs_upper + c]);
+ output_y_ptr[x * channels + c] =
compute_lerp(top_left, top_right, bottom_left, bottom_right,
- xs[x_inner].lerp, ys[y_inner].lerp);
+ xs_lerp, ys_lerp);
}
}
+ output_y_ptr += out_row_size;
}
- }
- }
-}
-
-template <typename T>
-void scale_similar_image(
- const T* input_image, const int b, const int64 out_height,
- const int64 out_width, const int channels, const int64 in_height,
- const int64 in_width, const std::vector<CachedInterpolation>& xs_vec,
- const std::vector<CachedInterpolation>& ys,
- typename TTypes<float, 4>::Tensor output) TF_ATTRIBUTE_NOINLINE;
-template <typename T>
-void scale_similar_image(const T* input_image, const int b,
- const int64 out_height, const int64 out_width,
- const int channels, const int64 in_height,
- const int64 in_width,
- const std::vector<CachedInterpolation>& xs_vec,
- const std::vector<CachedInterpolation>& ys,
- typename TTypes<float, 4>::Tensor output) {
- if (channels == 3) {
- // Compute the interpolation
- for (int64 y = 0; y < out_height; ++y) {
- const int64 in_y_lower = ys[y].lower * in_width * channels;
- const int64 in_y_upper = ys[y].upper * in_width * channels;
- const float ys_lerp = ys[y].lerp;
- // Similar-sized images do not have a set of inner loops.
- const CachedInterpolation* xs_ptr = xs_vec.data();
- for (int64 x = 0; x < out_width; ++x) {
- const int64 in_x_lower = xs_ptr->lower * 3;
- const int64 in_x_upper = xs_ptr->upper * 3;
- const float xs_lerp = xs_ptr->lerp;
- xs_ptr++;
-
- const float out0 =
- image_lerp(input_image, in_x_lower, in_x_upper, xs_lerp, in_y_lower,
- in_y_upper, ys_lerp, 0);
- const float out1 =
- image_lerp(input_image, in_x_lower, in_x_upper, xs_lerp, in_y_lower,
- in_y_upper, ys_lerp, 1);
- const float out2 =
- image_lerp(input_image, in_x_lower, in_x_upper, xs_lerp, in_y_lower,
- in_y_upper, ys_lerp, 2);
- float* dest = &output(b, y, x, 0);
- dest[0] = out0;
- dest[1] = out1;
- dest[2] = out2;
- }
- }
- } else {
- // Compute the interpolation
- for (int64 y = 0; y < out_height; ++y) {
- const int64 in_y_lower = ys[y].lower * in_width * channels;
- const int64 in_y_upper = ys[y].upper * in_width * channels;
- const float ys_lerp = ys[y].lerp;
- // Similar-sized images do not have a set of inner loops.
- const CachedInterpolation* xs_ptr = xs_vec.data();
- for (int64 x = 0; x < out_width; ++x) {
- const int64 in_x_lower = xs_ptr->lower * channels;
- const int64 in_x_upper = xs_ptr->upper * channels;
- const float xs_lerp = xs_ptr->lerp;
- xs_ptr++;
- for (int c = 0; c < channels; ++c) {
- const float top_left(input_image[in_y_lower + in_x_lower + c]);
- const float top_right(input_image[in_y_lower + in_x_upper + c]);
- const float bottom_left(input_image[in_y_upper + in_x_lower + c]);
- const float bottom_right(input_image[in_y_upper + in_x_upper + c]);
- output(b, y, x, c) = compute_lerp(top_left, top_right, bottom_left,
- bottom_right, xs_lerp, ys_lerp);
- }
- }
+ input_b_ptr += in_batch_num_values;
}
}
}
@@ -346,32 +219,22 @@ struct ResizeBilinear<CPUDevice, T> {
return;
}
- const ImageScalePattern scale_pattern =
- compute_image_scale_pattern(out_height, out_width, in_height, in_width);
std::vector<CachedInterpolation> ys(out_height + 1);
std::vector<CachedInterpolation> xs(out_width + 1);
// Compute the cached interpolation weights on the x and y dimensions.
- compute_interpolation_weights(scale_pattern, out_height, in_height,
- height_scale, ys.data());
- compute_interpolation_weights(scale_pattern, out_width, in_width,
- width_scale, xs.data());
-
- if (scale_pattern == SCALE_UP) {
- for (int b = 0; b < batch_size; ++b) {
- scale_up_image<T>(&images(b, 0, 0, 0), b, out_height, out_width,
- channels, in_height, in_width, xs, ys, output);
- }
- } else if (scale_pattern == SCALE_DOWN) {
- // Do not eagerly convert all input data points, as we ignore most.
- scale_down_image<T>(images, batch_size, out_height, out_width, channels,
- xs, ys, output);
- } else {
- for (int b = 0; b < batch_size; ++b) {
- scale_similar_image<T>(&images(b, 0, 0, 0), b, out_height, out_width,
- channels, in_height, in_width, xs, ys, output);
- }
+ compute_interpolation_weights(out_height, in_height, height_scale,
+ ys.data());
+ compute_interpolation_weights(out_width, in_width, width_scale, xs.data());
+
+ // Scale x interpolation weights to avoid a multiplication during iteration.
+ for (int i = 0; i < xs.size(); ++i) {
+ xs[i].lower *= channels;
+ xs[i].upper *= channels;
}
+
+ resize_image<T>(images, batch_size, in_height, in_width, out_height,
+ out_width, channels, xs, ys, output);
}
};
} // namespace functor