aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/resize_bicubic_op.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <nobody@tensorflow.org>2016-03-11 09:08:22 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-11 11:42:13 -0800
commitfc53648e0c0f5110bfab75a02b9dc75260b913d3 (patch)
treecaf4cf00eb9f29a22958461feb58a6628ccb94a7 /tensorflow/core/kernels/resize_bicubic_op.cc
parent625d62abef3d64ab9025cf248719b76aea3b7fc9 (diff)
Use faster floating point rounding and floor primitives in a couple of image processing kernels. Specifically
1) round(x) -> lrintf(x) 2) int i = static_cast<int>(floor(x)) -> int i = x; for x >=0. 1) lrintf() is much faster than round() as it compiles down to a single instruction. Thanks to sesse@ for the tip. 2) Casting float to int is truncating. This change speeds up bicubic resizer by ~7%. There is no benchmark for simple_distorted_bounding_box, but I verified that the change does not cause any regression on imagenet. Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_ResizeBicubic_8_32_3 111319 103691 +6.9% BM_ResizeBicubic_8_128_3 1513173 1415275 +6.5% BM_ResizeBicubic_8_512_3 23232055 21353694 +8.1% BM_ResizeBicubic_8_1024_3 91817334 86537924 +5.7% BM_ResizeBicubic_16_32_3 199235 184428 +7.4% BM_ResizeBicubic_16_128_3 2946248 2744626 +6.8% BM_ResizeBicubic_16_512_3 45957878 42832137 +6.8% BM_ResizeBicubic_16_1024_3 184548577 170523376 +7.6% BM_ResizeBicubic_32_32_3 432991 400042 +7.6% BM_ResizeBicubic_32_128_3 5767107 5395839 +6.4% BM_ResizeBicubic_32_512_3 94064648 86132128 +8.4% BM_ResizeBicubic_32_1024_3 370615017 341913896 +7.7% Change: 116978161
Diffstat (limited to 'tensorflow/core/kernels/resize_bicubic_op.cc')
-rw-r--r--tensorflow/core/kernels/resize_bicubic_op.cc4
1 files changed, 2 insertions, 2 deletions
diff --git a/tensorflow/core/kernels/resize_bicubic_op.cc b/tensorflow/core/kernels/resize_bicubic_op.cc
index 370786f453..f81383984b 100644
--- a/tensorflow/core/kernels/resize_bicubic_op.cc
+++ b/tensorflow/core/kernels/resize_bicubic_op.cc
@@ -62,9 +62,9 @@ inline int64 Bound(int64 val, int64 limit) {
inline void GetWeightsAndIndices(float scale, int64 out_loc, int64 limit,
std::array<float, 4>* weights,
std::array<int64, 4>* indices) {
- const int64 in_loc = floor(scale * out_loc);
+ const int64 in_loc = scale * out_loc;
const float delta = scale * out_loc - in_loc;
- const int64 offset = round(delta * kTableSize);
+ const int64 offset = lrintf(delta * kTableSize);
const float* coeffs_tab = GetCoeffsTable();
*weights = {{coeffs_tab[offset * 2 + 1], coeffs_tab[offset * 2],
coeffs_tab[(kTableSize - offset) * 2],